Requirements
All analyses were performed using R version 4.2.0. The
.rmd file must be located in the directory, along with the
sample_table.csv file, which corresponds to the metadata,
and the 0_DADA2_output folder, which contains the output
files from the DADA2 pipeline for the 16S and ITS libraries:
seqtab.nochim_16S.rdata and
seqtab.nochim_ITS.rdata for the abundance tables, and the
taxo_tab_16S.rdata and taxo_tab_ITS.rdata
files, which correspond to the taxonomic affiliations. These files are
necessary to run the entire script that follows.
Packages used
# install.packages("anyLib")
# library(anyLib) # This package allows the installation of the requested packages along with all necessary dependencies.
#
# needed.packages <- c("ade4", "Biostrings", "cluster", "FSA", "gclus", "ggplot2", "ggpubr", "graphics", "igraph", "kableExtra", "knitr", "magick", "magrittr", "MASS", "microeco", "multcompView", "plotly", "psych", "RColorBrewer", "rcompanion", "rstatix", "tidyverse", "vegan", "webshot", "dplyr", "DT", "plyr", "purrr", "venn")
#
# anyLib(needed.packages)
Preprocessing
Creating and saving the fasta file, abundance table and taxonomic table with ASV IDs
rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory
# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis"
newfolder <- "1_formatted_files"
if (!file.exists(newfolder)) dir.create(file.path(path, newfolder))
ID.fasta <- function(libraries) {
# libraries = "16S" (for bacterial library) or "ITS" (for fungal library)
# Loading abundance table after DADA2 processing
load(paste0("0_DADA2_output/seqtab.nochim_", libraries, ".rdata"))
asv_tab <- data.frame(t(seqtab.nochim))
asv_tab$seq <- row.names(asv_tab)
# Loading taxonomic table after DADA2 processing
load(paste0("0_DADA2_output/taxo_tab_", libraries, ".rdata"))
taxo_tab <- data.frame(taxotab)
taxo_tab$seq <- row.names(taxo_tab) # Creation of a "seq" variable containing raw sequences
# Merging tables using the "seq" variable
print("Dimensions before the merger:")
print(dim(asv_tab))
print(dim(taxo_tab))
asv_taxo_tab <- merge(taxo_tab,
asv_tab,
by = "seq",
all.x = TRUE,
all.y = TRUE
)
print("Dimensions after the merger:")
print(dim(asv_taxo_tab))
# Creating ASV IDs
asv_taxo_tab$ASV_ID <- sprintf(
paste0("ASV%04d_", libraries),
seq_along(asv_taxo_tab$seq)
)
row.names(asv_taxo_tab) <- asv_taxo_tab$ASV_ID # Putting ASV IDs as row names
# Creating the fasta file, abundance table and taxonomic table with ASV IDs
writeLines(
paste0(">", asv_taxo_tab$ASV_ID, "\n", asv_taxo_tab$seq),
paste0("1_formatted_files/ASV_seq_", libraries, ".fasta")
) # Creating and saving the fasta file
cols_to_exclude <- c(
"Kingdom",
"Phylum",
"Class",
"Order",
"Family",
"Genus",
"Species",
"ASV_ID",
"seq"
)
asv_tab <- asv_taxo_tab[, !(names(asv_taxo_tab) %in% cols_to_exclude)] # Creating the abundance table
taxo_tab <- subset(asv_taxo_tab,
select = c(
"Kingdom",
"Phylum",
"Class",
"Order",
"Family",
"Genus",
"Species"
)
) # Creating the taxonomic table
saveRDS(asv_tab,
file = paste0("1_formatted_files/ASV_tab_all_", libraries, ".RDS")
) # Saving the abundance table
saveRDS(taxo_tab,
file = paste0("1_formatted_files/taxo_tab_all_", libraries, ".RDS")
) # Saving the taxonomic table
}
ID.fasta("16S")
## [1] "Dimensions before the merger:"
## [1] 5963 165
## [1] 5963 8
## [1] "Dimensions after the merger:"
## [1] 5963 172
ID.fasta("ITS")
## [1] "Dimensions before the merger:"
## [1] 7257 165
## [1] 7257 8
## [1] "Dimensions after the merger:"
## [1] 7257 172
Correcting abundance tables according to negative controls
rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory
# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis"
newfolder <- "2_corrected_files"
if (!file.exists(newfolder)) dir.create(file.path(path, newfolder))
correct.tab <- function(libraries) {
# libraries = "16S" (for bacterial library) or "ITS" (for fungal library)
library(plyr)
# Loading the abundance table
ASV_tab <- readRDS(paste0("1_formatted_files/ASV_tab_all_", libraries, ".RDS"))
# Creating the abundance subtables for each "species" (Pg, Ps, and Bulk soil (=Bs))
Pg_tab <- subset(ASV_tab, select = grep("_Pg", names(ASV_tab))) # Pg abundance subtable
Ps_tab <- subset(ASV_tab, select = grep("_Ps", names(ASV_tab))) # Ps abundance subtable
Bulk_soil_df <- subset(ASV_tab, select = grep("_SNR", names(ASV_tab))) # Bs abundance subtable
# Creating the abundance subtables for each "species" (Pg, Ps, and Bulk soil (=Bs)) and "compartments" (leaves, pulps, seeds, roots and soil)
Pg_Leaves_df <- subset(Pg_tab, select = grep("_F", names(Pg_tab))) # Pg_leaves abundance subtable
Pg_Pulps_Seeds_df <- subset(Pg_tab, select = c(
grep("ADN_P", names(Pg_tab), fixed = TRUE),
grep("Tneg_P", names(Pg_tab), fixed = TRUE),
grep("_G", names(Pg_tab), fixed = TRUE)
)) # Pg_pulps_seeds abundance subtable
Pg_Roots_df <- subset(Pg_tab, select = grep("_R", names(Pg_tab))) # Pg_roots abundance subtable
Pg_Soil_df <- subset(Pg_tab, select = grep("_S", names(Pg_tab))) # Pg_soil abundance subtable
Ps_Leaves_df <- subset(Ps_tab, select = grep("_F", names(Ps_tab))) # Ps_leaves abundance subtable
Ps_Pulps_Seeds_df <- subset(Ps_tab, select = c(
grep("ADN_P", names(Ps_tab), fixed = TRUE),
grep("Tneg_P", names(Ps_tab), fixed = TRUE),
grep("_G", names(Ps_tab), fixed = TRUE)
)) # Ps_pulps_seeds abundance subtable
Ps_Roots_df <- subset(Ps_tab, select = grep("_R", names(Ps_tab))) # Ps_roots abundance subtable
Ps_Soil_df <- subset(Ps_tab, select = grep("_S", names(Ps_tab))) # Ps_soil abundance subtable
# Correcting tables
group_names <- c(
"Pg_Leaves_df",
"Pg_Pulps_Seeds_df",
"Pg_Roots_df",
"Pg_Soil_df",
"Ps_Leaves_df",
"Ps_Pulps_Seeds_df",
"Ps_Roots_df",
"Ps_Soil_df",
"Bulk_soil_df"
)
all_results <- list()
for (group_name in group_names) {
# Loading the subtables
group <- get(group_name)
# Getting the "Tneg" pattern (corresponding to negative controls)
col_indices <- grep("Tneg", colnames(group))
# Function for the subtraction of the average of non-zero "Tneg" columns for each table row
subtract_mean <- function(row) {
mean_values <- mean(row[col_indices], na.rm = TRUE)
result <- row - mean_values
result[result < 0] <- 0 # Replacing negative values by 0
return(result)
}
# Applying the function to each sub-table row
result_df <- as.data.frame(t(apply(group, 1, subtract_mean)))
# Assigning ASV IDs for row names
result_df$RowNames <- rownames(group)
# Assigning original data frame column names to result_df
colnames(result_df) <- c(colnames(group), "RowNames")
# Adding result to list
all_results[[group_name]] <- result_df
}
# Merging all results in a single dataframe
final_result <- all_results[[1]] # Initializing with the first data frame
# Merging with the other data frames
for (i in 2:length(all_results)) {
final_result <- merge(final_result, all_results[[i]], by = "RowNames", all = TRUE)
}
# Replacing NA by 0
final_result[is.na(final_result)] <- 0
# Removing "Tneg" colomns
cols_to_remove <- grep("Tneg", colnames(final_result))
final_result <- final_result[, -cols_to_remove]
row.names(final_result) <- final_result$RowNames
final_result <- final_result[, -1]
# Saving the corrected abundance table
saveRDS(final_result, file = paste0("2_corrected_files/ASV_tab_all_corrected_", libraries, ".RDS"))
}
correct.tab("16S")
correct.tab("ITS")
Removal of non-bacterial/fungal taxa
rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory
# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis"
newfolder <- "3_filtered_files"
if (!file.exists(newfolder)) dir.create(file.path(path, newfolder))
filt_taxa <- function(libraries) {
# libraries = "16S" (for bacterial library) or "ITS" (for fungal library)
library(microeco)
library(magrittr)
library(Biostrings)
# Loading of corrected abundance table, taxonomic table, sample metadata and fasta file
ASV_tab <- readRDS(paste0("2_corrected_files/ASV_tab_all_corrected_", libraries, ".RDS")) # corrected abundance table
taxo_tab <- readRDS(paste0("1_formatted_files/taxo_tab_all_", libraries, ".RDS")) # taxonomic table
taxo_tab %<>% tidy_taxonomy() # Function to format the taxonomic table (ex.: "Bacteria" -> "k__Bacteria"; add "k__" for Kingdom, "p__" for Phylum, "c__" for Class, "f__" for Family, "o__" for Order, "g__" for Genus and "s__" for Species)
sample_data <- read.csv("sample_table.csv",
sep = ";",
header = T, row.names = "Sample_ID"
)
fasta_file <- paste0("1_formatted_files/ASV_seq_", libraries, ".fasta") # Path to fasta file
fasta <- readDNAStringSet(fasta_file) # Reading the fasta file
# Creating a microtable object with microeco package
dataset <- microtable$new(
otu_table = ASV_tab,
tax_table = taxo_tab,
sample_table = sample_data,
rep_fasta = fasta
)
dataset$tax_table$Kingdom[grepl("k__$", dataset$tax_table$Kingdom)] %<>% paste0(., "Unclassified")
dataset$tax_table$Phylum[grepl("p__$", dataset$tax_table$Phylum)] %<>% paste0(., "Unclassified")
dataset$tax_table$Species[grepl("s__$", dataset$tax_table$Species)] %<>% paste0(., "Unclassified")
dataset$tax_table$Genus[grepl("g__$", dataset$tax_table$Genus)] %<>% paste0(., "Unclassified")
dataset$tax_table$Family[grepl("f__$", dataset$tax_table$Family)] %<>% paste0(., "Unclassified")
dataset$tax_table$Order[grepl("o__$", dataset$tax_table$Order)] %<>% paste0(., "Unclassified")
dataset$tax_table$Class[grepl("c__$", dataset$tax_table$Class)] %<>% paste0(., "Unclassified")
print(dataset)
dataset$tidy_dataset()
print(dataset) # tidy_dataset(): function to trim all the data to make all files consistent
if (libraries == "16S") {
# Deletion of non-bacterial taxa
dataset$tax_table %<>% base::subset(Kingdom == "k__Bacteria")
dataset$filter_pollution(taxa = c("Mitochondria", "Chloroplast"))
dataset
dataset$tidy_dataset()
print(dataset)
dataset$tidy_dataset() # One more time to remove samples with 0 ASV
print(dataset)
} else if (libraries == "ITS") {
# Deletion of non-fungal taxa
dataset$tax_table %<>% base::subset(Kingdom == "k__Fungi")
dataset$filter_pollution(taxa = c("Mitochondria", "Chloroplast"))
dataset
dataset$tidy_dataset()
print(dataset)
dataset$tidy_dataset() # One more time to remove samples with 0 ASV
print(dataset)
}
# Saving the filtered and corrected abundance table
saveRDS(dataset$otu_table,
file = paste0("3_filtered_files/ASV_tab_all_corrected_filt_", libraries, ".RDS")
)
# Saving the filtered and corrected taxonomic table
saveRDS(dataset$tax_table,
file = paste0("3_filtered_files/ASV_taxo_tab_all_corrected_filt_", libraries, ".RDS")
)
# Saving the filtered and corrected fasta file
writeXStringSet(dataset$rep_fasta,
format = "fasta",
file = paste0("3_filtered_files/ASV_seq_filt_", libraries, ".fasta")
)
}
filt_taxa("16S")
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 5821 rows and 147 columns
## tax_table have 5963 rows and 7 columns
## rep_fasta have 5963 sequences
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 5821 rows and 147 columns
## tax_table have 5821 rows and 7 columns
## rep_fasta have 5821 sequences
## microtable-class object:
## sample_table have 131 rows and 7 columns
## otu_table have 5601 rows and 131 columns
## tax_table have 5601 rows and 7 columns
## rep_fasta have 5601 sequences
## microtable-class object:
## sample_table have 131 rows and 7 columns
## otu_table have 5601 rows and 131 columns
## tax_table have 5601 rows and 7 columns
## rep_fasta have 5601 sequences
filt_taxa("ITS")
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 7228 rows and 146 columns
## tax_table have 7257 rows and 7 columns
## rep_fasta have 7257 sequences
## microtable-class object:
## sample_table have 145 rows and 7 columns
## otu_table have 7228 rows and 145 columns
## tax_table have 7228 rows and 7 columns
## rep_fasta have 7228 sequences
## microtable-class object:
## sample_table have 145 rows and 7 columns
## otu_table have 4625 rows and 145 columns
## tax_table have 4625 rows and 7 columns
## rep_fasta have 4625 sequences
## microtable-class object:
## sample_table have 145 rows and 7 columns
## otu_table have 4625 rows and 145 columns
## tax_table have 4625 rows and 7 columns
## rep_fasta have 4625 sequences
Normalization of abundance tables in CPM
rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory
# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis"
newfolder <- "4_normalized_tables"
if (!file.exists(newfolder)) dir.create(file.path(path, newfolder))
norm.CPM <- function(libraries) {
# libraries = "16S" (for bacterial library) or "ITS" (for fungal library)
# Loading the filtered and corrected abundance table
asv_tab <- readRDS(paste0("3_filtered_files/ASV_tab_all_corrected_filt_", libraries, ".RDS"))
total_seq <- colSums(asv_tab) # Calculating the sum of columns
asv_tab_normCPM <- data.frame(lapply(
data.frame(t((t(asv_tab) / total_seq) * 1000000)),
as.integer
)) # Normalization in CPM
row.names(asv_tab_normCPM) <- row.names(asv_tab) # Assigning ASV IDs for row names
# Saving the corrected, filtered and normalized abundance table
saveRDS(asv_tab_normCPM,
file = paste0("4_normalized_tables/ASV_tab_all.sp_all.asv_", libraries, ".RDS")
)
}
norm.CPM("16S")
norm.CPM("ITS")
ASV distribution (truth tables)
All data
rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory
# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis"
newfolder <- "6_venn_files"
if (!file.exists(newfolder)) dir.create(file.path(path, newfolder))
distrib.venn1 <- function(libraries, type) {
# libraries = "16S" (for bacterial library) or "ITS" (for fungal library)
# type = "ASV"
# Loading corrected, filtered and normalized abundance table
tab <- readRDS(paste0("4_normalized_tables/ASV_tab_all.sp_all.asv_", libraries, ".RDS"))
# Editing list of ASVs for each species and the bulk soil (=Bs)
Pg <- subset(tab, select = grep("_Pg", names(tab)))
Ps <- subset(tab, select = grep("_Ps", names(tab)))
Bs <- subset(tab, select = grep("_SNR", names(tab)))
# Creating ASV list with
sums_per_row <- rowSums(Pg)
indices <- which(sums_per_row > 0)
Pg_list <- rownames(Pg)[indices] # Pg ASV list
sums_per_row <- rowSums(Ps)
indices <- which(sums_per_row > 0)
Ps_list <- rownames(Ps)[indices] # Ps ASV list
sums_per_row <- rowSums(Bs)
indices <- which(sums_per_row > 0)
Bs_list <- rownames(Bs)[indices] # Bs ASV list
library(venn)
# Editing ASV distribution table with venn package
x <- list(Pg = Pg_list, Ps = Ps_list, Bs = Bs_list)
info_x <- extractInfo(x, what = c("counts", "intersections", "both"), use.names = TRUE)
info_x <- info_x[-1, ]
info_x <- rbind(info_x, colSums(info_x)) # Add a row with the sums of each colomn
rownames(info_x)[nrow(info_x)] <- "Total"
write.table(info_x,
file = paste0("6_venn_files/venn_all_", type, "_", libraries, ".csv"),
sep = ";", row.names = TRUE, col.names = NA
) # Saving the table to create venn diagram later
# Counts by compartments
Species <- c("Pg (Ni-HA)", "Ps (Ni-T)", "Bulk soil")
Counts <- c(
sum(subset(info_x, Pg == 1)$counts),
sum(subset(info_x, Ps == 1)$counts),
sum(subset(info_x, Bs == 1)$counts)
)
counts_tab <- data.frame(Species, Counts)
# Reorganization
counts_tab$Species <- factor(counts_tab$Species,
levels = c(
"Pg (Ni-HA)",
"Ps (Ni-T)",
"Bulk soil"
)
)
# Histogramme
library(ggplot2)
barplot_counts <- ggplot(counts_tab, aes(x = Counts, y = Species)) +
geom_bar(stat = "identity") +
geom_text(aes(label = Counts, vjust = 0.3)) +
theme(
axis.title.y = element_blank(),
axis.text.y = element_text(size = 14)
) +
xlab("Number of ASVs")
ggsave(paste0("6_venn_files/histo_venn_all_", type, "_", libraries, ".svg"),
plot = barplot_counts,
width = 15,
height = 5,
units = "cm",
dpi = 1200
)
# Editing list of specific and shared ASVs
Pg.Ps.Bs <- Reduce(intersect, list(Pg_list, Ps_list, Bs_list)) # Subset "Pg-Ps-Bs"
Pg_without_Pg.Ps.Bs <- setdiff(Pg_list, Pg.Ps.Bs) # "Pg" + "Pg-Ps" + "Pg-Bs" subsets
Ps_without_Pg.Ps.Bs <- setdiff(Ps_list, Pg.Ps.Bs) # "Ps" + "Pg-Ps" + "Ps-Bs" subsets
Bs_without_Pg.Ps.Bs <- setdiff(Bs_list, Pg.Ps.Bs) # "Bs" + "Pg-Bs" + "Ps-Bs" subsets
Pg.Ps <- Reduce(intersect, list(Pg_without_Pg.Ps.Bs, Ps_without_Pg.Ps.Bs)) # Subset "Pg-Ps"
Pg.Bs <- Reduce(intersect, list(Pg_without_Pg.Ps.Bs, Bs_without_Pg.Ps.Bs)) # Subset "Pg-Bs"
Ps.Bs <- Reduce(intersect, list(Ps_without_Pg.Ps.Bs, Bs_without_Pg.Ps.Bs)) # Subset "Ps-Bs"
Pg <- setdiff(setdiff(setdiff(Pg_list, Pg.Bs), Pg.Ps), Pg.Ps.Bs) # Subset "Pg"
Ps <- setdiff(setdiff(setdiff(Ps_list, Ps.Bs), Pg.Ps), Pg.Ps.Bs) # Subset "Ps"
Bs <- setdiff(setdiff(setdiff(Bs_list, Pg.Bs), Ps.Bs), Pg.Ps.Bs) # Subset "Bs"
# Saving list of each subset
saveRDS(Pg, file = paste0("6_venn_files/list_ASV_Pg_subset_", libraries, ".RDS"))
saveRDS(Pg.Bs, file = paste0("6_venn_files/list_ASV_Pg-Bs_subset_", libraries, ".RDS"))
saveRDS(Pg.Ps, file = paste0("6_venn_files/list_ASV_Pg-Ps_subset_", libraries, ".RDS"))
saveRDS(Pg.Ps.Bs, file = paste0("6_venn_files/list_ASV_Pg-Ps-Bs_subset_", libraries, ".RDS"))
saveRDS(Ps, file = paste0("6_venn_files/list_ASV_Ps_subset_", libraries, ".RDS"))
saveRDS(Ps.Bs, file = paste0("6_venn_files/list_ASV_Ps-Bs_subset_", libraries, ".RDS"))
saveRDS(Bs, file = paste0("6_venn_files/list_ASV_Bs_subset_", libraries, ".RDS"))
# Display truth table
library("DT")
datatable(info_x,
width = NULL, height = NULL,
caption = paste0(type, " distribution (", libraries, ")")
)
}
distrib.venn1("16S", "ASV")
distrib.venn1("ITS", "ASV")
Specific data
rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory
distrib.venn2 <- function(libraries, type, subset) {
# libraries = "16S" (for bacterial library) or "ITS" (for fungal library)
# type = "ASV"
# subset = "Pg-Ps", "Pg-Ps-Bs", "Pg", "Ps", "Pg-Bs", "Ps-Bs", "Pg-spec" (corresponding to "Pg" + "Pg-Bs" subsets), "Ps-spec" (corresponding to "Ps" + "Ps-Bs" subsets), "core" (corresponding to "Pg-Ps" + "Pg-Ps-Bs" subsets), or "recruited" (corresponding to "Ps-Bs" + "Pg-Bs" + "Pg-Ps-Bs" subsets)
# Loading ASV list
if (subset == "Pg-spec") {
list1_path <- paste0("6_venn_files/list_ASV_Pg_subset_", libraries, ".RDS")
list2_path <- paste0("6_venn_files/list_ASV_Pg-Bs_subset_", libraries, ".RDS")
if (file.exists(list1_path) && file.exists(list2_path)) {
list1 <- readRDS(list1_path)
list2 <- readRDS(list2_path)
} else if (file.exists(list1_path) && !file.exists(list2_path)) {
list <- readRDS(list1_path)
} else if (!file.exists(list1_path) && file.exists(list2_path)) {
list <- readRDS(list2_path)
} else if (!file.exists(list1_path) && !file.exists(list2_path)) {
print("No data available.")
return()
}
} else if (subset == "Ps-spec") {
list1_path <- paste0("6_venn_files/list_ASV_Ps_subset_", libraries, ".RDS")
list2_path <- paste0("6_venn_files/list_ASV_Ps-Bs_subset_", libraries, ".RDS")
if (file.exists(list1_path) && file.exists(list2_path)) {
list1 <- readRDS(list1_path)
list2 <- readRDS(list2_path)
} else if (file.exists(list1_path) && !file.exists(list2_path)) {
list <- readRDS(list1_path)
} else if (!file.exists(list1_path) && file.exists(list2_path)) {
list <- readRDS(list2_path)
} else if (!file.exists(list1_path) && !file.exists(list2_path)) {
print("No data available.")
return()
}
} else if (subset == "core") {
list1_path <- paste0("6_venn_files/list_ASV_Pg-Ps_subset_", libraries, ".RDS")
list2_path <- paste0("6_venn_files/list_ASV_Pg-Ps-Bs_subset_", libraries, ".RDS")
if (file.exists(list1_path) && file.exists(list2_path)) {
list1 <- readRDS(list1_path)
list2 <- readRDS(list2_path)
} else if (file.exists(list1_path) && !file.exists(list2_path)) {
list <- readRDS(list1_path)
} else if (!file.exists(list1_path) && file.exists(list2_path)) {
list <- readRDS(list2_path)
} else if (!file.exists(list1_path) && !file.exists(list2_path)) {
print("No data available.")
return()
}
} else if (subset == "recruited") {
list1_path <- paste0("6_venn_files/list_ASV_Pg-Ps-Bs_subset_", libraries, ".RDS")
list2_path <- paste0("6_venn_files/list_ASV_Pg-Bs_subset_", libraries, ".RDS")
list3_path <- paste0("6_venn_files/list_ASV_Ps-Bs_subset_", libraries, ".RDS")
if (file.exists(list1_path) && file.exists(list2_path) && file.exists(list3_path)) {
list1 <- readRDS(list1_path)
list2 <- readRDS(list2_path)
list3 <- readRDS(list3_path)
} else if (file.exists(list1_path) && !file.exists(list2_path) && !file.exists(list3_path)) {
list <- readRDS(list1_path)
} else if (!file.exists(list1_path) && file.exists(list2_path) && !file.exists(list3_path)) {
list <- readRDS(list2_path)
} else if (!file.exists(list1_path) && !file.exists(list2_path) && file.exists(list3_path)) {
list <- readRDS(list3_path)
} else if (file.exists(list1_path) && file.exists(list2_path) && !file.exists(list3_path)) {
list1 <- readRDS(list1_path)
list2 <- readRDS(list2_path)
} else if (file.exists(list1_path) && !file.exists(list2_path) && file.exists(list3_path)) {
list1 <- readRDS(list1_path)
list2 <- readRDS(list3_path)
} else if (!file.exists(list1_path) && file.exists(list2_path) && file.exists(list3_path)) {
list1 <- readRDS(list2_path)
list2 <- readRDS(list3_path)
}
}
else {
list_path <- paste0("6_venn_files/list_ASV_", subset, "_subset_", libraries, ".RDS")
if (file.exists(list_path)) {
list <- readRDS(list_path)
} else {
print(paste0("No ", type, " in ", subset, " subset"))
return()
}
}
# Loading ASV abundance table
abund_tab <- readRDS(paste0("4_normalized_tables/ASV_tab_all.sp_all.asv_", libraries, ".RDS"))
# ASV obtained only in the subset called
if (subset == "Pg-spec" | subset == "Ps-spec" | subset == "core") {
if (exists("list1") && exists("list2")) {
list <- c(list1, list2)
}
} else if (subset == "recruited") {
if (exists("list1") && exists("list2") && exists("list3")) {
list <- c(list1, list2, list3)
} else if (exists("list1") && exists("list2") && !exists("list3")) {
list <- c(list1, list2)
} else if (exists("list1") && !exists("list2") && exists("list3")) {
list <- c(list1, list3)
} else if (!exists("list1") && exists("list2") && exists("list3")) {
list <- c(list2, list3)
}
}
tab <- subset(abund_tab, rownames(abund_tab) %in% list)
# Creation of abundance tables by compartment
Leaves <- "ADN_F"
selected_cols <- grep(Leaves, names(tab), value = TRUE)
Leaves_df <- tab[, selected_cols]
if (is.null(dim(Leaves_df))) {
rm(list = "Leaves_df")
}
if (exists("Leaves_df")) Leaves_df <- Leaves_df[rowSums(Leaves_df) != 0, ] # Remove ASV with 0 abundance
Pulps <- "ADN_P"
selected_cols <- grep(Pulps, names(tab), value = TRUE)
Pulps_df <- tab[, selected_cols]
if (is.null(dim(Pulps_df))) {
rm(list = "Pulps_df")
}
if (exists("Pulps_df")) Pulps_df <- Pulps_df[rowSums(Pulps_df) != 0, ] # Remove ASV with 0 abundance
Seeds <- "ADN_G"
selected_cols <- grep(Seeds, names(tab), value = TRUE)
Seeds_df <- tab[, selected_cols]
if (is.null(dim(Seeds_df))) {
rm(list = "Seeds_df")
}
if (exists("Seeds_df")) Seeds_df <- Seeds_df[rowSums(Seeds_df) != 0, ] # Remove ASV with 0 abundance
Roots <- "ADN_R"
selected_cols <- grep(Roots, names(tab), value = TRUE)
Roots_df <- tab[, selected_cols]
if (is.null(dim(Roots_df))) {
rm(list = "Roots_df")
}
if (exists("Roots_df")) Roots_df <- Roots_df[rowSums(Roots_df) != 0, ] # Remove ASV with 0 abundance
Soil <- "ADN_S"
selected_cols <- grep(Soil, names(tab), value = TRUE)
Soil_df <- tab[, selected_cols]
if (is.null(dim(Soil_df))) {
rm(list = "Soil_df")
}
if (exists("Soil_df")) Soil_df <- Soil_df[rowSums(Soil_df) != 0, ] # Remove ASV with 0 abundance
# Creating truth table
library(venn)
x <- list()
if (exists("Seeds_df")) x$Seeds <- rownames(Seeds_df)
if (exists("Pulps_df")) x$Pulps <- rownames(Pulps_df)
if (exists("Leaves_df")) x$Leaves <- rownames(Leaves_df)
if (exists("Roots_df")) x$Roots <- rownames(Roots_df)
if (exists("Soil_df")) x$Soil <- rownames(Soil_df)
info_x <- extractInfo(x, what = c("counts", "intersections", "both"), use.names = TRUE)
info_x <- info_x[-1, ]
info_x <- rbind(info_x, colSums(info_x))
rownames(info_x)[nrow(info_x)] <- "Total"
# Saving venn table
write.table(info_x,
file = paste0("6_venn_files/venn_", subset, "_ASV_", libraries, ".csv"),
sep = ";", row.names = TRUE, col.names = NA
)
# Counts by compartments
Compartment <- c("Seeds", "Pulps", "Leaves", "Roots", "R. soil")
Counts <- c(
sum(subset(info_x, Seeds == 1)$counts),
sum(subset(info_x, Pulps == 1)$counts),
sum(subset(info_x, Leaves == 1)$counts),
sum(subset(info_x, Roots == 1)$counts),
sum(subset(info_x, Soil == 1)$counts)
)
counts_tab <- data.frame(Compartment, Counts)
# Reorganization
counts_tab$Compartment <- factor(counts_tab$Compartment,
levels = c(
"Seeds",
"Pulps",
"Leaves",
"Roots",
"R. soil"
)
)
# Histogramme
library(ggplot2)
if (subset == "core") {
barplot_counts <- ggplot(counts_tab, aes(x = Compartment, y = Counts)) +
geom_bar(stat = "identity") +
geom_text(aes(label = Counts, vjust = 0.3)) +
theme(
axis.title.x = element_blank(),
axis.text.x = element_text(size = 14)
) +
ylab("Number of ASVs")
ggsave(paste0("6_venn_files/histo_venn_", subset, "_ASV_", libraries, ".svg"),
plot = barplot_counts,
width = 15,
height = 5,
units = "cm",
dpi = 1200
)
} else {
barplot_counts <- ggplot(counts_tab, aes(x = Counts, y = Compartment)) +
geom_bar(stat = "identity") +
geom_text(aes(label = Counts, vjust = 0.3)) +
theme(
axis.title.y = element_blank(),
axis.text.y = element_text(size = 14)
) +
xlab("Number of ASVs")
ggsave(paste0("6_venn_files/histo_venn_", subset, "_ASV_", libraries, ".svg"),
plot = barplot_counts,
width = 15,
height = 5,
units = "cm",
dpi = 1200
)
}
# Display the truth tables
library("DT")
datatable(info_x,
width = NULL, height = NULL,
caption = paste0(type, " distribution in ", subset, " subset (", libraries, ")")
)
}
distrib.venn2("16S", "ASV", "Pg-spec")
distrib.venn2("16S", "ASV", "Ps-spec")
distrib.venn2("ITS", "ASV", "Pg-spec")
distrib.venn2("ITS", "ASV", "Ps-spec")
distrib.venn2("16S", "ASV", "core")
distrib.venn2("ITS", "ASV", "core")
distrib.venn2("16S", "ASV", "recruited")
distrib.venn2("ITS", "ASV", "recruited")
Editing specific ASV abundance tables and taxonomic tables
rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory
# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis"
newfolder <- "7_editing_figures"
if (!file.exists(newfolder)) dir.create(file.path(path, newfolder))
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis/7_editing_figures"
newfolder_1 <- "ASV_data"
if (!file.exists(newfolder_1)) dir.create(file.path(path, newfolder_1))
edit.spec.tab <- function(libraries, type, subset) {
# libraries = "16S" (for bacterial library) or "ITS" (for fungal library)
# type = "ASV"
# subset = "Pg-Ps", "Pg-Ps-Bs", "Pg", "Ps", "Pg-Bs", "Ps-Bs", "Pg-spec" (corresponding to "Pg" + "Pg-Bs" subsets), "Ps-spec" (corresponding to "Ps" + "Ps-Bs" subsets) or "core" (corresponding to "Pg-Ps" + "Pg-Ps-Bs" subsets)
library(microeco)
library(magrittr)
library(Biostrings)
library(ggplot2)
# Loading sample metadata
sample_data <- read.csv("sample_table.csv",
sep = ";",
header = T, row.names = "Sample_ID"
)
# Loading abundance table and taxonomic table
asv_tab <- readRDS(paste0("4_normalized_tables/ASV_tab_all.sp_all.asv_", libraries, ".RDS"))
taxo_tab <- readRDS(paste0("3_filtered_files/ASV_taxo_tab_all_corrected_filt_", libraries, ".RDS"))
# Creating microtable object
DS <- microtable$new(
otu_table = asv_tab,
tax_table = taxo_tab,
sample_table = sample_data
)
print(DS)
DS$tidy_dataset()
print(DS) # Making all files consistent
# Loading ASV list
library(dplyr)
if (subset == "Pg-spec") {
distrib_path_Pg <- paste0("6_venn_files/list_ASV_Pg_subset_", libraries, ".RDS")
distrib_path_Pg.Bs <- paste0("6_venn_files/list_ASV_Pg-Bs_subset_", libraries, ".RDS")
if (file.exists(distrib_path_Pg) && file.exists(distrib_path_Pg.Bs)) {
distrib_Pg <- readRDS(distrib_path_Pg)
distrib_Pg.Bs <- readRDS(distrib_path_Pg.Bs)
distrib <- c(distrib_Pg, distrib_Pg.Bs)
} else if (file.exists(distrib_path_Pg) && !file.exists(distrib_path_Pg.Bs)) {
distrib <- readRDS(distrib_path_Pg)
} else if (!file.exists(distrib_path_Pg) && file.exists(distrib_path_Pg.Bs)) {
distrib <- readRDS(distrib_path_Pg.Bs)
} else {
print(paste0("No ", type, " in ", subset, " subset (", libraries, ")"))
return()
}
} else if (subset == "Ps-spec") {
distrib_path_Ps <- paste0("6_venn_files/list_ASV_Ps_subset_", libraries, ".RDS")
distrib_path_Ps.Bs <- paste0("6_venn_files/list_ASV_Ps-Bs_subset_", libraries, ".RDS")
if (file.exists(distrib_path_Ps) && file.exists(distrib_path_Ps.Bs)) {
distrib_Ps <- readRDS(distrib_path_Ps)
distrib_Ps.Bs <- readRDS(distrib_path_Ps.Bs)
distrib <- c(distrib_Ps, distrib_Ps.Bs)
} else if (file.exists(distrib_path_Ps) && !file.exists(distrib_path_Ps.Bs)) {
distrib <- readRDS(distrib_path_Ps)
} else if (!file.exists(distrib_path_Ps) && file.exists(distrib_path_Ps.Bs)) {
distrib <- readRDS(distrib_path_Ps.Bs)
} else {
print(paste0("No ", type, " in ", subset, " subset (", libraries, ")"))
return()
}
} else if (subset == "core") {
distrib_path_Pg.Ps <- paste0("6_venn_files/list_ASV_Pg-Ps_subset_", libraries, ".RDS")
distrib_path_Pg.Ps.Bs <- paste0("6_venn_files/list_ASV_Pg-Ps-Bs_subset_", libraries, ".RDS")
if (file.exists(distrib_path_Pg.Ps) && file.exists(distrib_path_Pg.Ps.Bs)) {
distrib_Pg.Ps <- readRDS(distrib_path_Pg.Ps)
distrib_Pg.Ps.Bs <- readRDS(distrib_path_Pg.Ps.Bs)
distrib <- c(distrib_Pg.Ps, distrib_Pg.Ps.Bs)
} else if (file.exists(distrib_path_Pg.Ps) && !file.exists(distrib_path_Pg.Ps.Bs)) {
distrib <- readRDS(distrib_path_Pg.Ps)
} else if (!file.exists(distrib_path_Pg.Ps) && file.exists(distrib_path_Pg.Ps.Bs)) {
distrib <- readRDS(distrib_path_Pg.Ps.Bs)
} else {
print(paste0("No ", type, " in ", subset, " subset (", libraries, ")"))
return()
}
}
tab <- subset(DS$otu_table, rownames(DS$otu_table) %in% distrib)
# Making all files consistent
DS2 <- microtable$new(
otu_table = tab,
tax_table = DS$tax_table,
sample_table = DS$sample_table
)
print(DS2)
DS2$tidy_dataset()
print(DS2)
abund_tab <- DS2$otu_table
tab <- DS2$tax_table
# Saving abundance table and taxonomic table for each subset
saveRDS(abund_tab, file = paste0("7_editing_figures/", type, "_data/abund_tab_", subset, "_subset_", libraries, ".RDS"))
saveRDS(tab, file = paste0("7_editing_figures/", type, "_data/taxo_tab_", subset, "_subset_", libraries, ".RDS"))
# Subtable creations
Leaves <- "ADN_F"
selected_cols <- grep(Leaves, names(abund_tab), value = TRUE)
leaves_tab <- data.frame(abund_tab[, selected_cols, drop = FALSE])
if (ncol(leaves_tab) == 0) {
print("No available data for leaves compartment")
} else {
DS_leaves <- microtable$new(
otu_table = leaves_tab,
tax_table = DS2$tax_table
)
print(DS_leaves)
DS_leaves$tidy_dataset()
print(DS_leaves)
saveRDS(DS_leaves$otu_table, file = paste0("7_editing_figures/", type, "_data/abund_tab_", subset, "_subset_leaves_", libraries, ".RDS"))
saveRDS(DS_leaves$tax_table, file = paste0("7_editing_figures/", type, "_data/taxo_tab_", subset, "_subset_leaves_", libraries, ".RDS"))
}
Pulps <- "ADN_P"
selected_cols <- grep(Pulps, names(abund_tab), value = TRUE)
pulps_tab <- data.frame(abund_tab[, selected_cols, drop = FALSE])
if (ncol(pulps_tab) == 0) {
print("No available data for pulps compartment")
} else {
DS_pulps <- microtable$new(
otu_table = pulps_tab,
tax_table = DS2$tax_table
)
print(DS_pulps)
DS_pulps$tidy_dataset()
print(DS_pulps)
saveRDS(DS_pulps$otu_table, file = paste0("7_editing_figures/", type, "_data/abund_tab_", subset, "_subset_pulps_", libraries, ".RDS"))
saveRDS(DS_pulps$tax_table, file = paste0("7_editing_figures/", type, "_data/taxo_tab_", subset, "_subset_pulps_", libraries, ".RDS"))
}
Seeds <- "ADN_G"
selected_cols <- grep(Seeds, names(abund_tab), value = TRUE)
seeds_tab <- data.frame(abund_tab[, selected_cols, drop = FALSE])
if (ncol(seeds_tab) == 0) {
print("No available data for seeds compartment")
} else {
DS_seeds <- microtable$new(
otu_table = seeds_tab,
tax_table = DS2$tax_table
)
print(DS_seeds)
DS_seeds$tidy_dataset()
print(DS_seeds)
saveRDS(DS_seeds$otu_table, file = paste0("7_editing_figures/", type, "_data/abund_tab_", subset, "_subset_seeds_", libraries, ".RDS"))
saveRDS(DS_seeds$tax_table, file = paste0("7_editing_figures/", type, "_data/taxo_tab_", subset, "_subset_seeds_", libraries, ".RDS"))
}
Roots <- "ADN_R"
selected_cols <- grep(Roots, names(abund_tab), value = TRUE)
roots_tab <- data.frame(abund_tab[, selected_cols, drop = FALSE])
if (ncol(roots_tab) == 0) {
print("No available data for roots compartment")
} else {
DS_roots <- microtable$new(
otu_table = roots_tab,
tax_table = DS2$tax_table
)
print(DS_roots)
DS_roots$tidy_dataset()
print(DS_roots)
saveRDS(DS_roots$otu_table, file = paste0("7_editing_figures/", type, "_data/abund_tab_", subset, "_subset_roots_", libraries, ".RDS"))
saveRDS(DS_roots$tax_table, file = paste0("7_editing_figures/", type, "_data/taxo_tab_", subset, "_subset_roots_", libraries, ".RDS"))
}
Soil <- "ADN_S"
selected_cols <- grep(Soil, names(abund_tab), value = TRUE)
soil_tab <- data.frame(abund_tab[, selected_cols, drop = FALSE])
if (ncol(soil_tab) == 0) {
print("No available data for soil compartment")
} else {
DS_soil <- microtable$new(
otu_table = soil_tab,
tax_table = DS2$tax_table
)
print(DS_soil)
DS_soil$tidy_dataset()
print(DS_soil)
saveRDS(DS_soil$otu_table, file = paste0("7_editing_figures/", type, "_data/abund_tab_", subset, "_subset_soil_", libraries, ".RDS"))
saveRDS(DS_soil$tax_table, file = paste0("7_editing_figures/", type, "_data/taxo_tab_", subset, "_subset_soil_", libraries, ".RDS"))
}
}
edit.spec.tab("16S", "ASV", "Pg-spec")
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 5601 rows and 131 columns
## tax_table have 5601 rows and 7 columns
## microtable-class object:
## sample_table have 131 rows and 7 columns
## otu_table have 5601 rows and 131 columns
## tax_table have 5601 rows and 7 columns
## microtable-class object:
## sample_table have 131 rows and 7 columns
## otu_table have 705 rows and 67 columns
## tax_table have 5601 rows and 7 columns
## microtable-class object:
## sample_table have 67 rows and 7 columns
## otu_table have 705 rows and 67 columns
## tax_table have 705 rows and 7 columns
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 9 rows and 8 columns
## tax_table have 705 rows and 7 columns
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 9 rows and 8 columns
## tax_table have 9 rows and 7 columns
## microtable-class object:
## sample_table have 10 rows and 2 columns
## otu_table have 9 rows and 10 columns
## tax_table have 705 rows and 7 columns
## microtable-class object:
## sample_table have 10 rows and 2 columns
## otu_table have 9 rows and 10 columns
## tax_table have 9 rows and 7 columns
## microtable-class object:
## sample_table have 1 rows and 2 columns
## otu_table have 1 rows and 1 columns
## tax_table have 705 rows and 7 columns
## microtable-class object:
## sample_table have 1 rows and 2 columns
## otu_table have 1 rows and 1 columns
## tax_table have 1 rows and 7 columns
## microtable-class object:
## sample_table have 16 rows and 2 columns
## otu_table have 375 rows and 16 columns
## tax_table have 705 rows and 7 columns
## microtable-class object:
## sample_table have 16 rows and 2 columns
## otu_table have 375 rows and 16 columns
## tax_table have 375 rows and 7 columns
## microtable-class object:
## sample_table have 32 rows and 2 columns
## otu_table have 471 rows and 32 columns
## tax_table have 705 rows and 7 columns
## microtable-class object:
## sample_table have 32 rows and 2 columns
## otu_table have 471 rows and 32 columns
## tax_table have 471 rows and 7 columns
edit.spec.tab("16S", "ASV", "Ps-spec")
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 5601 rows and 131 columns
## tax_table have 5601 rows and 7 columns
## microtable-class object:
## sample_table have 131 rows and 7 columns
## otu_table have 5601 rows and 131 columns
## tax_table have 5601 rows and 7 columns
## microtable-class object:
## sample_table have 131 rows and 7 columns
## otu_table have 959 rows and 64 columns
## tax_table have 5601 rows and 7 columns
## microtable-class object:
## sample_table have 64 rows and 7 columns
## otu_table have 959 rows and 64 columns
## tax_table have 959 rows and 7 columns
## microtable-class object:
## sample_table have 1 rows and 2 columns
## otu_table have 1 rows and 1 columns
## tax_table have 959 rows and 7 columns
## microtable-class object:
## sample_table have 1 rows and 2 columns
## otu_table have 1 rows and 1 columns
## tax_table have 1 rows and 7 columns
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 24 rows and 8 columns
## tax_table have 959 rows and 7 columns
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 24 rows and 8 columns
## tax_table have 24 rows and 7 columns
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 11 rows and 8 columns
## tax_table have 959 rows and 7 columns
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 11 rows and 8 columns
## tax_table have 11 rows and 7 columns
## microtable-class object:
## sample_table have 15 rows and 2 columns
## otu_table have 307 rows and 15 columns
## tax_table have 959 rows and 7 columns
## microtable-class object:
## sample_table have 15 rows and 2 columns
## otu_table have 307 rows and 15 columns
## tax_table have 307 rows and 7 columns
## microtable-class object:
## sample_table have 32 rows and 2 columns
## otu_table have 822 rows and 32 columns
## tax_table have 959 rows and 7 columns
## microtable-class object:
## sample_table have 32 rows and 2 columns
## otu_table have 822 rows and 32 columns
## tax_table have 822 rows and 7 columns
edit.spec.tab("ITS", "ASV", "Pg-spec")
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 4625 rows and 145 columns
## tax_table have 4625 rows and 7 columns
## microtable-class object:
## sample_table have 145 rows and 7 columns
## otu_table have 4625 rows and 145 columns
## tax_table have 4625 rows and 7 columns
## microtable-class object:
## sample_table have 145 rows and 7 columns
## otu_table have 880 rows and 83 columns
## tax_table have 4625 rows and 7 columns
## microtable-class object:
## sample_table have 83 rows and 7 columns
## otu_table have 880 rows and 83 columns
## tax_table have 880 rows and 7 columns
## microtable-class object:
## sample_table have 16 rows and 2 columns
## otu_table have 91 rows and 16 columns
## tax_table have 880 rows and 7 columns
## microtable-class object:
## sample_table have 16 rows and 2 columns
## otu_table have 91 rows and 16 columns
## tax_table have 91 rows and 7 columns
## microtable-class object:
## sample_table have 10 rows and 2 columns
## otu_table have 64 rows and 10 columns
## tax_table have 880 rows and 7 columns
## microtable-class object:
## sample_table have 10 rows and 2 columns
## otu_table have 64 rows and 10 columns
## tax_table have 64 rows and 7 columns
## microtable-class object:
## sample_table have 9 rows and 2 columns
## otu_table have 27 rows and 9 columns
## tax_table have 880 rows and 7 columns
## microtable-class object:
## sample_table have 9 rows and 2 columns
## otu_table have 27 rows and 9 columns
## tax_table have 27 rows and 7 columns
## microtable-class object:
## sample_table have 16 rows and 2 columns
## otu_table have 241 rows and 16 columns
## tax_table have 880 rows and 7 columns
## microtable-class object:
## sample_table have 16 rows and 2 columns
## otu_table have 241 rows and 16 columns
## tax_table have 241 rows and 7 columns
## microtable-class object:
## sample_table have 32 rows and 2 columns
## otu_table have 680 rows and 32 columns
## tax_table have 880 rows and 7 columns
## microtable-class object:
## sample_table have 32 rows and 2 columns
## otu_table have 680 rows and 32 columns
## tax_table have 680 rows and 7 columns
edit.spec.tab("ITS", "ASV", "Ps-spec")
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 4625 rows and 145 columns
## tax_table have 4625 rows and 7 columns
## microtable-class object:
## sample_table have 145 rows and 7 columns
## otu_table have 4625 rows and 145 columns
## tax_table have 4625 rows and 7 columns
## microtable-class object:
## sample_table have 145 rows and 7 columns
## otu_table have 1514 rows and 78 columns
## tax_table have 4625 rows and 7 columns
## microtable-class object:
## sample_table have 78 rows and 7 columns
## otu_table have 1514 rows and 78 columns
## tax_table have 1514 rows and 7 columns
## microtable-class object:
## sample_table have 16 rows and 2 columns
## otu_table have 475 rows and 16 columns
## tax_table have 1514 rows and 7 columns
## microtable-class object:
## sample_table have 16 rows and 2 columns
## otu_table have 475 rows and 16 columns
## tax_table have 475 rows and 7 columns
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 72 rows and 8 columns
## tax_table have 1514 rows and 7 columns
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 72 rows and 8 columns
## tax_table have 72 rows and 7 columns
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 13 rows and 8 columns
## tax_table have 1514 rows and 7 columns
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 13 rows and 8 columns
## tax_table have 13 rows and 7 columns
## microtable-class object:
## sample_table have 14 rows and 2 columns
## otu_table have 314 rows and 14 columns
## tax_table have 1514 rows and 7 columns
## microtable-class object:
## sample_table have 14 rows and 2 columns
## otu_table have 314 rows and 14 columns
## tax_table have 314 rows and 7 columns
## microtable-class object:
## sample_table have 32 rows and 2 columns
## otu_table have 938 rows and 32 columns
## tax_table have 1514 rows and 7 columns
## microtable-class object:
## sample_table have 32 rows and 2 columns
## otu_table have 938 rows and 32 columns
## tax_table have 938 rows and 7 columns
Alpha diversity
rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory
# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis/7_editing_figures"
newfolder <- "alpha_diversity"
if (!file.exists(newfolder)) dir.create(file.path(path, newfolder))
a.div.index <- function(libraries, p.adj) {
# libraries = "16S" (for bacterial library) or "ITS" (for fungal library)
# p.adj = "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr" or "none"
library(vegan)
# Loading abundance tables and sample metadata
asv_tab <- readRDS(file = paste0("4_normalized_tables/ASV_tab_all.sp_all.asv_", libraries, ".RDS"))
asv_tab <- as.data.frame(t(asv_tab))
sample_data <- read.csv("sample_table.csv",
sep = ";",
header = T, row.names = "Sample_ID"
)
sample_data <- sample_data[, c(
"Sample",
"Species_Compartment",
"Compartment_2"
)]
# Calculation of diversity indices
N_effectif <- data.frame(
Species_Compartment = names(table(sample_data$Species_Compartment)),
N = as.vector(table(sample_data$Species_Compartment))
)
colnames(N_effectif)[colnames(N_effectif) == "Species_Compartment"] <- "Species and compartment"
N_effectif$`Species and compartment` <- gsub("_", " ", N_effectif$`Species and compartment`)
Observed <- data.frame(Sample = rownames(asv_tab), Observed = rowSums(asv_tab > 0)) # Number of observed ASVs
Shannon <- data.frame(Sample = rownames(asv_tab), Shannon = diversity(asv_tab, index = "shannon")) # Shannon index
Invsimpson <- data.frame(Sample = rownames(asv_tab), Invsimpson = diversity(asv_tab, index = "invsimpson")) # Invsimpson index
library(dplyr)
library(purrr)
list_df <- list(Observed, Shannon, Invsimpson, sample_data)
alpha_div <- reduce(list_df, full_join, by = "Sample") # table with all calculated indices
# Checking data normality (Shapiro-Wilk)
print(shapiro.test(alpha_div$Observed))
print(shapiro.test(alpha_div$Shannon))
print(shapiro.test(alpha_div$Invsimpson))
# Kruskal-Wallis test
print(kruskal.test(alpha_div$Observed ~ alpha_div$Species_Compartment))
print(kruskal.test(alpha_div$Shannon ~ alpha_div$Species_Compartment))
print(kruskal.test(alpha_div$Invsimpson ~ alpha_div$Species_Compartment))
library(rcompanion)
library(multcompView)
library(tidyverse)
library(ggpubr)
library(ggplot2)
library(rstatix)
library(FSA)
library(knitr)
library(kableExtra)
# Computes statistics (Wilcoxon Test)
# Observed
stat.observed <- alpha_div %>%
wilcox_test(Observed ~ Species_Compartment, p.adjust.method = p.adj) %>%
add_significance()
stat.observed$Comparison <- paste(stat.observed$group1, stat.observed$group2, sep = "-")
write.table(stat.observed,
file = paste0("7_editing_figures/alpha_diversity/stat.observed_", libraries, ".txt"),
sep = ";", row.names = FALSE
)
mean_tab <- setNames(aggregate(Observed ~ Species_Compartment, data = alpha_div, mean), c("Species and compartment", "mean")) # Calculating mean
SD <- setNames(aggregate(Observed ~ Species_Compartment, data = alpha_div, sd), c("Species and compartment", "SD")) # Calculating standard deviation
stat.observed <- stat.observed[order(stat.observed$p.adj, decreasing = FALSE), ] # Organisation by descending order
CLD <- cldList(p.adj ~ Comparison, data = stat.observed) # Determination of Tukey letters
colnames(CLD)[colnames(CLD) == "Group"] <- "Species and compartment"
merge_tab <- merge(merge(mean_tab, SD, by = "Species and compartment", all = TRUE), CLD, by = "Species and compartment", all = TRUE)
merge_tab$mean <- round(merge_tab$mean, 0)
merge_tab$SD <- round(merge_tab$SD, 0)
merge_tab$Observed <- paste0(merge_tab$mean, " ± ", merge_tab$SD, " ", merge_tab$Letter)
merge_tab$Compartment_2 <- ifelse(grepl("Leaves|Pulps|Seeds", merge_tab$"Species and compartment"), "Aerial compartments", "Underground compartments")
merge_tab$Species <- ifelse(grepl("Pg", merge_tab$`Species and compartment`),
"Pg",
ifelse(grepl("Ps", merge_tab$`Species and compartment`),
"Ps", "Bs"
)
)
merge_tab$`Species and compartment` <- gsub("_", " ", merge_tab$`Species and compartment`)
# Choose the font
library(extrafont)
#font_import(paths = "C:/Windows/Fonts", prompt = FALSE) # Import available fonts (do this only once)
loadfonts(device = "win") # For Windows
loadfonts(device = "pdf") # For export PDF graphics
# Plotting cleveland diagram to vizualise the data distribution
rhizo_tab <- subset(merge_tab, Compartment_2 == "Underground compartments")
rhizo_tab$`Species and compartment` <- factor(rhizo_tab$`Species and compartment`,
levels = c(
"Bulk Soil",
"Ps Soil",
"Pg Soil",
"Ps Roots",
"Pg Roots"
)
)
aerial_tab <- subset(merge_tab, Compartment_2 == "Aerial compartments")
aerial_tab$`Species and compartment` <- factor(aerial_tab$`Species and compartment`,
levels = c(
"Ps Leaves",
"Pg Leaves",
"Ps Pulps",
"Pg Pulps",
"Ps Seeds",
"Pg Seeds"
)
)
rhizo_obs <- ggplot(rhizo_tab) +
aes(
x = mean,
y = `Species and compartment`,
shape = Species
) +
geom_point(stat = "identity", aes(color = Species), size = 1.5) +
theme(
text = element_text(family = "serif", size = 9.5),
axis.title.y = element_blank(),
legend.position = "none"
) +
xlab("Observed index") +
geom_errorbar(aes(xmin = mean - SD, xmax = mean + SD, color = Species),
width = .1,
position = position_dodge(0.05)
) +
scale_color_manual(values = c(Bs = "#ED7F10", Pg = "#008E8E", Ps = "#85C17E")) +
scale_shape_manual(values = c(Bs = 17, Pg = 19, Ps = 15))
aerial_obs <- ggplot(aerial_tab) +
aes(
x = mean,
y = `Species and compartment`,
shape = Species
) +
geom_point(stat = "identity", aes(color = Species), size = 1.5) +
theme(
text = element_text(family = "serif", size = 9.5),
axis.title.y = element_blank(),
legend.position = "none"
) +
xlab("Observed index") +
geom_errorbar(aes(xmin = mean - SD, xmax = mean + SD, color = Species),
width = .1,
position = position_dodge(0.05)
) +
scale_color_manual(values = c(Bs = "#ED7F10", Pg = "#008E8E", Ps = "#85C17E")) +
scale_shape_manual(values = c(Bs = 17, Pg = 19, Ps = 15))
Observed <- merge_tab[, c("Species and compartment", "Observed")]
# Shannon
stat.shannon <- alpha_div %>%
wilcox_test(Shannon ~ Species_Compartment, p.adjust.method = p.adj) %>%
add_significance()
stat.shannon$Comparison <- paste(stat.shannon$group1, stat.shannon$group2, sep = "-")
write.table(stat.shannon,
file = paste0("7_editing_figures/alpha_diversity/stat.shannon_", libraries, ".txt"),
sep = ";", row.names = FALSE
)
mean_tab <- setNames(aggregate(Shannon ~ Species_Compartment, data = alpha_div, mean), c("Species and compartment", "mean")) # Calculating mean
SD <- setNames(aggregate(Shannon ~ Species_Compartment, data = alpha_div, sd), c("Species and compartment", "SD")) # Calculating standard deviation
stat.shannon <- stat.shannon[order(stat.shannon$p.adj, decreasing = FALSE), ] # Organisation by descending order
CLD <- cldList(p.adj ~ Comparison, data = stat.shannon) # Determination of Tukey letters
colnames(CLD)[colnames(CLD) == "Group"] <- "Species and compartment"
merge_tab <- merge(merge(mean_tab, SD, by = "Species and compartment", all = TRUE), CLD, by = "Species and compartment", all = TRUE)
merge_tab$mean <- round(merge_tab$mean, 2)
merge_tab$SD <- round(merge_tab$SD, 2)
merge_tab$Shannon <- paste0(merge_tab$mean, " ± ", merge_tab$SD, " ", merge_tab$Letter)
merge_tab$Species <- ifelse(grepl("Pg", merge_tab$`Species and compartment`),
"Pg",
ifelse(grepl("Ps", merge_tab$`Species and compartment`),
"Ps", "Bs"
)
)
merge_tab$`Species and compartment` <- gsub("_", " ", merge_tab$`Species and compartment`)
# Plotting cleveland diagram to vizualise the data distribution
merge_tab$`Species and compartment` <- factor(merge_tab$`Species and compartment`,
levels = c(
"Bulk Soil",
"Ps Soil",
"Pg Soil",
"Ps Roots",
"Pg Roots",
"Ps Leaves",
"Pg Leaves",
"Ps Pulps",
"Pg Pulps",
"Ps Seeds",
"Pg Seeds"
)
)
distrib_sha <- ggplot(merge_tab) +
aes(
x = mean,
y = `Species and compartment`,
shape = Species
) +
geom_point(stat = "identity", aes(color = Species), size = 1.5) +
theme(
text = element_text(family = "serif", size = 9.5),
axis.title.y = element_blank(),
legend.position = "none"
) +
xlab("Shannon index") +
geom_errorbar(aes(xmin = mean - SD, xmax = mean + SD, color = Species),
width = .1,
position = position_dodge(0.05)
) +
scale_color_manual(values = c(Bs = "#ED7F10", Pg = "#008E8E", Ps = "#85C17E")) +
scale_shape_manual(values = c(Bs = 17, Pg = 19, Ps = 15))
Shannon <- merge_tab[, c("Species and compartment", "Shannon")]
# Invsimpson
stat.invsimpson <- alpha_div %>%
wilcox_test(Invsimpson ~ Species_Compartment, p.adjust.method = p.adj) %>%
add_significance()
stat.invsimpson$Comparison <- paste(stat.invsimpson$group1, stat.invsimpson$group2, sep = "-")
write.table(stat.invsimpson,
file = paste0("7_editing_figures/alpha_diversity/stat.invsimpson_", libraries, ".txt"),
sep = ";", row.names = FALSE
)
mean_tab <- setNames(aggregate(Invsimpson ~ Species_Compartment, data = alpha_div, mean), c("Species and compartment", "mean")) # Calculating mean
SD <- setNames(aggregate(Invsimpson ~ Species_Compartment, data = alpha_div, sd), c("Species and compartment", "SD")) # Calculating standard deviation
stat.invsimpson <- stat.invsimpson[order(stat.invsimpson$p.adj, decreasing = FALSE), ] # Organisation by descending order
CLD <- cldList(p.adj ~ Comparison, data = stat.invsimpson) # Determination of Tukey letters
colnames(CLD)[colnames(CLD) == "Group"] <- "Species and compartment"
merge_tab <- merge(merge(mean_tab, SD, by = "Species and compartment", all = TRUE), CLD, by = "Species and compartment", all = TRUE)
merge_tab$mean <- round(merge_tab$mean, 2)
merge_tab$SD <- round(merge_tab$SD, 2)
merge_tab$Invsimpson <- paste0(merge_tab$mean, " ± ", merge_tab$SD, " ", merge_tab$Letter)
merge_tab$Species <- ifelse(grepl("Pg", merge_tab$`Species and compartment`),
"Pg",
ifelse(grepl("Ps", merge_tab$`Species and compartment`),
"Ps", "Bs"
)
)
merge_tab$`Species and compartment` <- gsub("_", " ", merge_tab$`Species and compartment`)
# Plotting cleveland diagram to vizualise the data distribution
merge_tab$`Species and compartment` <- factor(merge_tab$`Species and compartment`,
levels = c(
"Bulk Soil",
"Ps Soil",
"Pg Soil",
"Ps Roots",
"Pg Roots",
"Ps Leaves",
"Pg Leaves",
"Ps Pulps",
"Pg Pulps",
"Ps Seeds",
"Pg Seeds"
)
)
distrib_invsi <- ggplot(merge_tab) +
aes(
x = mean,
y = `Species and compartment`,
shape = Species
) +
geom_point(stat = "identity", aes(color = Species), size = 1.5) +
theme(
text = element_text(family = "serif", size = 9.5),
axis.title.y = element_blank(),
axis.text.y = element_blank()
) +
xlab("Invsimpson index") +
geom_errorbar(aes(xmin = mean - SD, xmax = mean + SD, color = Species),
width = .1,
position = position_dodge(0.05)
) +
scale_color_manual(values = c(Bs = "#ED7F10", Pg = "#008E8E", Ps = "#85C17E")) +
scale_shape_manual(values = c(Bs = 17, Pg = 19, Ps = 15))
Invsimpson <- merge_tab[, c("Species and compartment", "Invsimpson")]
# Editing final table with all results
list_df <- list(N_effectif, Observed, Shannon, Invsimpson)
alpha_vf <- reduce(list_df, full_join, by = "Species and compartment")
order <- c("Pg Seeds", "Ps Seeds", "Pg Pulps", "Ps Pulps", "Pg Leaves", "Ps Leaves", "Pg Roots", "Ps Roots", "Pg Soil", "Ps Soil", "Bulk Soil")
alpha_vf <- alpha_vf[match(order, alpha_vf$"Species and compartment"), ]
write.table(alpha_vf, file = paste0("7_editing_figures/alpha_diversity/alpha_div_", libraries, ".txt"), sep = ";", row.names = FALSE)
# Display table of results
library("DT")
if (libraries == "16S") {
title <- "Alpha diversity indices (bacterial communities)"
} else {
title <- "Alpha diversity indices (fungal communities)"
}
print(datatable(alpha_vf, width = NULL, height = NULL, rownames = FALSE, caption = title))
# Editing final plot
index_1 <- ggarrange(aerial_obs, rhizo_obs,
nrow = 2, ncol = 1
)
index_2 <- ggarrange(index_1, distrib_sha, distrib_invsi,
nrow = 1, ncol = 3
)
if (libraries == "16S") {
title <- "(a) Distribution of alpha diversity indices (bacterial communities)"
} else {
title <- "(b) Distribution of alpha diversity indices (fungal communities)"
}
# annotate_figure(index_2, top = text_grob(title, color = "black", face = "bold", size = 12))
# Save figure
ggsave(paste0("7_editing_figures/alpha_diversity/fig_obs_sha_invsi_", libraries, ".pdf"),
plot = last_plot(),
width = 15,
height = 7.5,
units = "cm",
dpi = 800
)
# annotate_figure(index_2, top = text_grob(title, color = "black", face = "bold", size = 12))
}
a.div.index("16S", "fdr")
##
## Shapiro-Wilk normality test
##
## data: alpha_div$Observed
## W = 0.85926, p-value = 8.052e-10
##
##
## Shapiro-Wilk normality test
##
## data: alpha_div$Shannon
## W = 0.82451, p-value = 3.296e-11
##
##
## Shapiro-Wilk normality test
##
## data: alpha_div$Invsimpson
## W = 0.69735, p-value = 4.37e-15
##
##
## Kruskal-Wallis rank sum test
##
## data: alpha_div$Observed by alpha_div$Species_Compartment
## Kruskal-Wallis chi-squared = 115.24, df = 10, p-value < 2.2e-16
##
##
## Kruskal-Wallis rank sum test
##
## data: alpha_div$Shannon by alpha_div$Species_Compartment
## Kruskal-Wallis chi-squared = 123.49, df = 10, p-value < 2.2e-16
##
##
## Kruskal-Wallis rank sum test
##
## data: alpha_div$Invsimpson by alpha_div$Species_Compartment
## Kruskal-Wallis chi-squared = 126.31, df = 10, p-value < 2.2e-16
a.div.index("ITS", "fdr")
##
## Shapiro-Wilk normality test
##
## data: alpha_div$Observed
## W = 0.92619, p-value = 7.91e-07
##
##
## Shapiro-Wilk normality test
##
## data: alpha_div$Shannon
## W = 0.89834, p-value = 1.645e-08
##
##
## Shapiro-Wilk normality test
##
## data: alpha_div$Invsimpson
## W = 0.76572, p-value = 6.102e-14
##
##
## Kruskal-Wallis rank sum test
##
## data: alpha_div$Observed by alpha_div$Species_Compartment
## Kruskal-Wallis chi-squared = 139.8, df = 10, p-value < 2.2e-16
##
##
## Kruskal-Wallis rank sum test
##
## data: alpha_div$Shannon by alpha_div$Species_Compartment
## Kruskal-Wallis chi-squared = 136.04, df = 10, p-value < 2.2e-16
##
##
## Kruskal-Wallis rank sum test
##
## data: alpha_div$Invsimpson by alpha_div$Species_Compartment
## Kruskal-Wallis chi-squared = 134.64, df = 10, p-value < 2.2e-16
Beta diversity
MDS
rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory
# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis/7_editing_figures"
newfolder <- "beta_diversity"
if (!file.exists(newfolder)) dir.create(file.path(path, newfolder))
# Choice of colours
c1 <- "#24445C" # Seeds Pg
c2 <- "#800080" # Pulps Pg
c3 <- "#85C17E" # Leaves Pg
c4 <- "#DAB30A" # Roots Pg
c5 <- "#5B3C11" # Soil Pg
c6 <- "#8EA2C6" # Seeds Ps
c7 <- "#C71585" # Pulps Ps
c8 <- "#B0F2B6" # Leaves Ps
c9 <- "#E7F00D" # Roots Ps
c10 <- "#8B6C42" # Soil Ps
c11 <- "black" # Bulk Soil
all_MDS <- function(libraries, method, nk) {
# libraries = "16S" (for bacterial library) or "ITS" (for fungal library)
# method = "morisita", "manhattan", "euclidean", "canberra", "clark", "bray", "kulczynski", "jaccard", "gower", "altGower", "horn", "mountford", "raup", "binomial", "chao", "cao", "mahalanobis", "chisq", "chord", "hellinger", "aitchison", or "robust.aitchison"
# nk = maximum dimension of the space which the data are to be represented in
library(microeco)
library(magrittr)
library(Biostrings)
library(vegan)
library(plotly)
library(webshot)
library(magick)
# Loading abundance table and sample metadata
asv_tab <- readRDS(paste0("4_normalized_tables/ASV_tab_all.sp_all.asv_", libraries, ".RDS"))
sample_data <- read.csv("sample_table.csv",
sep = ";",
header = T, row.names = "Sample_ID"
)
DS <- microtable$new(
otu_table = asv_tab,
sample_table = sample_data
)
print(DS)
DS$tidy_dataset()
print(DS)
sample_info <- DS$sample_table
sample_info$Species_Compartment <- gsub("_", " ", sample_info$Species_Compartment)
asvTab <- data.frame(t(DS$otu_table)) # Échantillons en ligne
# Calculating the distance matrix
matrix_morisita <- as.matrix(vegdist(asvTab, method = method))
write.table(matrix_morisita,
file = paste0("7_editing_figures/beta_diversity/matrix_", method, "_", libraries, ".csv"),
sep = ";",
col.names = NA
)
# MDS
prin_comp <- cmdscale(matrix_morisita, k = nk, eig = TRUE)
# Calculate the proportion of variance explained by each component
explained_variance_ratio <- prin_comp$eig / sum(prin_comp$eig) * 100
explained_variance_ratio <- round(explained_variance_ratio, 1)
# Creation of the dataframe with the principal components for the final figure
components <- data.frame(
Axis1 = prin_comp$points[, 1],
Axis2 = prin_comp$points[, 2],
Axis3 = prin_comp$points[, 3]
)
# Adding information to the "components" dataframe
components$Species_Compartment <- sample_info$Species_Compartment
# Reversing the axes
components$Axis1 <- -components$Axis1
components$Axis2 <- -components$Axis2
components$Axis3 <- -components$Axis3
# Create an interactive 3D point cloud plot
components$Species_Compartment <- factor(components$Species_Compartment,
levels = c(
"Pg Seeds", "Pg Pulps", "Pg Leaves", "Pg Roots", "Pg Soil",
"Ps Seeds", "Ps Pulps", "Ps Leaves", "Ps Roots", "Ps Soil",
"Bulk Soil"
)
)
if (libraries == "16S") {
title <- "(a) MDS analysis of bacterial samples"
} else if (libraries == "ITS") {
title <- "(b) MDS analysis of fungal samples"
}
fig_species_compartment <- plot_ly(components,
x = ~Axis1,
y = ~Axis2, z = ~Axis3,
color = ~Species_Compartment,
colors = c(c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11),
type = "scatter3d", mode = "markers",
marker = list(size = 7)
) %>%
layout(
title = title,
scene = list(
xaxis = list(title = paste("Axis 1 (", toString(explained_variance_ratio[1]), "%)", sep = ""), titlefont = list(size = 15), tickfont = list(size = 12)),
yaxis = list(title = paste("Axis 2 (", toString(explained_variance_ratio[2]), "%)", sep = ""), titlefont = list(size = 15), tickfont = list(size = 12)),
zaxis = list(title = paste("Axis 3 (", toString(explained_variance_ratio[3]), "%)", sep = ""), titlefont = list(size = 15), tickfont = list(size = 12))
),
legend = list(title = list(text = "Species and compartment"))
)
fig_species_compartment
}
all_MDS("16S", "morisita", 3)
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 5601 rows and 131 columns
## microtable-class object:
## sample_table have 131 rows and 7 columns
## otu_table have 5601 rows and 131 columns
all_MDS("ITS", "morisita", 3)
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 4625 rows and 145 columns
## microtable-class object:
## sample_table have 145 rows and 7 columns
## otu_table have 4625 rows and 145 columns
Hierarchical Clustering (HC)
rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory
# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis/7_editing_figures"
newfolder <- "beta_diversity"
if (!file.exists(newfolder)) dir.create(file.path(path, newfolder))
dend.cah <- function(libraries, method, distance, clusters) {
# libraries = "16S" (for bacterial library) or "ITS" (for fungal library)
# method = "morisita", "manhattan", "euclidean", "canberra", "clark", "bray", "kulczynski", "jaccard", "gower", "altGower", "horn", "mountford", "raup", "binomial", "chao", "cao", "mahalanobis", "chisq", "chord", "hellinger", "aitchison", or "robust.aitchison"
# distance = "ward.D2", "ward.D", "single", "complete", "average" (= UPGMA), "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC)
# clusters = number of clusters
library(ade4)
library(vegan)
library(gclus)
library(cluster)
library(RColorBrewer)
library(MASS)
library(extrafont)
loadfonts(device = "pdf")
# Loading abundance table
asv_tab <- readRDS(paste0("4_normalized_tables/ASV_tab_all.sp_all.asv_", libraries, ".RDS"))
asv_tab <- t(asv_tab)
asv_tab <- as.data.frame(asv_tab)
# Distance matrix
matrix.d <- vegdist(asv_tab, method = method)
# Classification
matrix.cw <- hclust(matrix.d, distance)
# Group reorganisation
matrix.cwo <- reorder.hclust(matrix.cw, matrix.d)
# Merging levels graph
if (libraries == "16S") {
title <- "(a) Merge fusion (bacterial samples)"
} else {
title <- "(b) Merge fusion (fungal samples)"
}
png(filename = paste0("7_editing_figures/beta_diversity/merge_levels_", libraries,".png"),
width = 15, height = 15,
units = "cm", res = 600)
plot(sort(matrix.cw$height), nrow(asv_tab):2,
type="S",
main= title,
ylab="k (number of clusters)",
xlab="h (node height)", col="grey",
cex.main = 1,
cex.axis = 0.75)
text(sort(matrix.cw $height), nrow(asv_tab):2, nrow(asv_tab):2,
col="red", cex=0.8)
dev.off()
# Plotting the dendrogram and k classes
# if (libraries == "16S") {
# title = "Dendrogram of Hierarchical Clustering of Samples Based on ASV Composition (bacterial samples)"
# } else {
# title = "Dendrogram of Hierarchical Clustering of Samples Based on ASV Composition (fungal samples)"
# }
pdf(
file = paste0("7_editing_figures/beta_diversity/dendrogram_", method, "_", clusters, "_", libraries, ".pdf"),
width = 8.8, height = 6.6
)
# Part 1: drawing the dendrogram and rectangles
plot(matrix.cwo,
hang = -1, cex = 0.25, cex.axis = 0.75,
ylab = "Distance", xlab = paste(nrow(asv_tab), " samples"),
main = NULL,
sub = paste(clusters, " clusters (k)")
)
rect.hclust(matrix.cwo, k = clusters, border = 1:clusters)
# Part 2: adding labels to rectangles
if (libraries == "16S") {
text(
x = 17, y = -0.7,
labels = "k1_B", pos = 3, font = 2, cex = 0.5
)
text(
x = 49, y = -0.7,
labels = "k2_B", pos = 3, font = 2, cex = 0.5
)
text(
x = 72, y = -0.7,
labels = "k3_B", pos = 3, font = 2, cex = 0.5
)
text(
x = 88, y = -0.7,
labels = "k4_B", pos = 3, font = 2, cex = 0.5
)
text(
x = 100.5, y = -0.7,
labels = "k5_B", pos = 3, font = 2, cex = 0.5
)
text(
x = 109, y = -0.7,
labels = "k6_B", pos = 3, font = 2, cex = 0.5
)
text(
x = 114, y = -0.7,
labels = "k7_B", pos = 3, font = 2, cex = 0.25
)
text(
x = 123.5, y = -0.7,
labels = "k8_B", pos = 3, font = 2, cex = 0.5
)
# Part 3: add the horizontal line and its label
abline(h = matrix.cwo$height[length(matrix.cwo$height) - 2], lty = 2, col = "red", lwd = 1)
text(x = 1.8, y = round(matrix.cwo$height[length(matrix.cwo$height) - 2], 0)-0.05, labels = paste0("distance = ", round(matrix.cwo$height[length(matrix.cwo$height) - 2], 2)), pos = 3, col = "red", cex = 0.5)
} else if (libraries == "ITS") {
text(
x = 8, y = -0.64,
labels = "k1_F", pos = 3, font = 2, cex = 0.5
)
text(
x = 25, y = -0.64,
labels = "k2_F", pos = 3, font = 2, cex = 0.5
)
text(
x = 41, y = -0.64,
labels = "k3_F", pos = 3, font = 2, cex = 0.5
)
text(
x = 56, y = -0.64,
labels = "k4_F", pos = 3, font = 2, cex = 0.5
)
text(
x = 71, y = -0.64,
labels = "k5_F", pos = 3, font = 2, cex = 0.5
)
text(
x = 82, y = -0.64,
labels = "k6_F", pos = 3, font = 2, cex = 0.5
)
text(
x = 90, y = -0.64,
labels = "k7_F", pos = 3, font = 2, cex = 0.5
)
text(
x = 102.5, y = -0.64,
labels = "k8_F", pos = 3, font = 2, cex = 0.5
)
text(
x = 120, y = -0.64,
labels = "k9_F", pos = 3, font = 2, cex = 0.5
)
text(
x = 137.5, y = -0.64,
labels = "k10_F", pos = 3, font = 2, cex = 0.5
)
# Part 3: add the horizontal line and its label
abline(h = matrix.cwo$height[length(matrix.cwo$height) - 2], lty = 2, col = "red", lwd = 1)
text(x = 1.3, y = round(matrix.cwo$height[length(matrix.cwo$height) - 2], 0)-0.05, labels = paste0("distance = ", round(matrix.cwo$height[length(matrix.cwo$height) - 2], 1)), pos = 3, col = "red", cex = 0.5)
}
dev.off()
}
dend.cah("16S", "morisita", "ward.D2", 8)
## png
## 2
dend.cah("ITS", "morisita", "ward.D2", 10)
## png
## 2
Relative abundances (barplot)
Figures
rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory
# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis/7_editing_figures"
newfolder <- "barplots"
if (!file.exists(newfolder)) dir.create(file.path(path, newfolder))
barplot.fig <- function(libraries, subset, type, level, nb) {
# libraries = "16S" (for bacterial library) or "ITS" (for fungal library)
# subset = "all.sp_all.asv"
# type = "ASV"
# level = "Phylum", "Class", "Family", "Order", Genus" or "Species"
# nb = number of taxa in legend
library(microeco)
library(magrittr)
library(Biostrings)
library(ggplot2)
# Loading tables
abund_tab <- readRDS(paste0("4_normalized_tables/ASV_tab_", subset, "_", libraries, ".RDS"))
tab <- readRDS(paste0("3_filtered_files/ASV_taxo_tab_all_corrected_filt_", libraries, ".RDS"))
sample_data <- read.csv("sample_table.csv",
sep = ";",
header = T, row.names = "Sample_ID"
)
DS <- microtable$new(
otu_table = abund_tab,
tax_table = tab,
sample_table = sample_data
)
print(DS)
DS$tidy_dataset()
print(DS)
# Calculating relative abundance
t1 <- trans_abund$new(
dataset = DS,
taxrank = level,
ntaxa = nb,
groupmean = "Species_Compartment"
)
t2 <- trans_abund$new(
dataset = DS,
taxrank = level,
ntaxa = nb
)
saveRDS(t2$data_abund, file = paste0("7_editing_figures/barplots/row_data_abund_", subset, "_subset_", level, "_", libraries, ".RDS"))
# Saving data abundance table
write.table(t1$data_abund, file = paste0("7_editing_figures/barplots/mean_sd_abund_", subset, "_subset_", level, "_", libraries, ".txt"), sep = "\t", col.names = NA)
library(dplyr)
# Reorganising columns
t1$data_abund <- t1$data_abund %>%
mutate(Species = ifelse(grepl("Pg", Sample), "Pg",
ifelse(grepl("Ps", Sample), "Ps", "Bs")
))
t1$data_abund <- t1$data_abund %>%
mutate(Compartment = ifelse(grepl("Leaves", Sample), "Leaves",
ifelse(grepl("Pulps", Sample), "Pulps",
ifelse(grepl("Seeds", Sample), "Seeds",
ifelse(grepl("Roots", Sample), "Roots", "Soil")
)
)
))
t1$data_abund$Compartment <- factor(t1$data_abund$Compartment, levels = c("Seeds", "Pulps", "Leaves", "Roots", "Soil"))
# Plotting relative abundances
fig <- t1$plot_bar(
others_color = "grey90",
facet = c("Compartment", "Species"),
barwidth = 3,
xtext_keep = FALSE,
strip_text = 12,
legend_text_italic = TRUE,
ytitle_size = 12
) +
theme(
text = element_text(family = "serif", size = 12),
axis.text = element_text(size = 10),
legend.key.height = unit(0.75, "lines"),
legend.key.width = unit(0.75, "lines"),
legend.position = "right",
plot.margin = margin(t = 0)
) +
guides(fill = guide_legend(title = level, ncol = 1))
ggsave(paste0("7_editing_figures/barplots/plot_", subset, "_", level, "_subset_", libraries, ".pdf"),
plot = fig,
device = "pdf",
width = 170,
height = 113,
units = "mm",
dpi = 1200
)
fig
}
barplot.fig("16S", "all.sp_all.asv", "ASV", "Phylum", 10)
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 5601 rows and 131 columns
## tax_table have 5601 rows and 7 columns
## microtable-class object:
## sample_table have 131 rows and 7 columns
## otu_table have 5601 rows and 131 columns
## tax_table have 5601 rows and 7 columns
barplot.fig("ITS", "all.sp_all.asv", "ASV", "Phylum", 10)
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 4625 rows and 145 columns
## tax_table have 4625 rows and 7 columns
## microtable-class object:
## sample_table have 145 rows and 7 columns
## otu_table have 4625 rows and 145 columns
## tax_table have 4625 rows and 7 columns
barplot.fig("16S", "all.sp_all.asv", "ASV", "Class", 20)
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 5601 rows and 131 columns
## tax_table have 5601 rows and 7 columns
## microtable-class object:
## sample_table have 131 rows and 7 columns
## otu_table have 5601 rows and 131 columns
## tax_table have 5601 rows and 7 columns
barplot.fig("ITS", "all.sp_all.asv", "ASV", "Class", 20)
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 4625 rows and 145 columns
## tax_table have 4625 rows and 7 columns
## microtable-class object:
## sample_table have 145 rows and 7 columns
## otu_table have 4625 rows and 145 columns
## tax_table have 4625 rows and 7 columns
barplot.fig("16S", "all.sp_all.asv", "ASV", "Genus", 20)
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 5601 rows and 131 columns
## tax_table have 5601 rows and 7 columns
## microtable-class object:
## sample_table have 131 rows and 7 columns
## otu_table have 5601 rows and 131 columns
## tax_table have 5601 rows and 7 columns
barplot.fig("ITS", "all.sp_all.asv", "ASV", "Genus", 20)
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 4625 rows and 145 columns
## tax_table have 4625 rows and 7 columns
## microtable-class object:
## sample_table have 145 rows and 7 columns
## otu_table have 4625 rows and 145 columns
## tax_table have 4625 rows and 7 columns
Statistics
rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory
# Get stats for relative abundances
get.stats <- function(libraries, type, subset, level) {
# libraries = "16S" (for bacterial library) or "ITS" (for fungal library)
# type = "ASV"
# subset = "all.sp_all.asv"
# level = "Phylum", "Class", "Family", "Order", Genus" or "Species"
library(vegan)
library(rstatix)
abund_tab <- readRDS(paste0("7_editing_figures/barplots/row_data_abund_", subset, "_subset_", level, "_", libraries, ".RDS"))
# Remove taxa with an abundance of 0
abund <- abund_tab[abund_tab$Abundance > 0, ]
abund$ID <- paste0(abund$Taxonomy, "_", abund$Species_Compartment)
data_keep <- c()
taxa_list <- unique(abund$ID)
# Remove "taxa" with only 1 or 2 replicates to perform the statistical tests
for (i in taxa_list) {
nbval <- nrow(abund[abund$ID == i, ])
if (nbval >= 3) {
data_keep <- c(data_keep, i)
}
}
abund_filt <- abund[abund$ID %in% data_keep, ]
# Check the normality of data
test_norm <- shapiro.test(abund_filt$Abundance)
# Get statistics
if (test_norm$p.value <= 0.05) {
stats_abund <- abund_filt %>%
wilcox_test(Abundance ~ ID, p.adjust.method = "fdr") %>%
add_significance()
} else {
print("Data follows the normality, do an ANOVA.")
return()
}
# Saving table
write.table(stats_abund,
file = paste0("7_editing_figures/barplots/stats_abund_", subset, "_subset_", level, "_", libraries, ".txt"),
sep = "\t",
col.names = NA
)
}
get.stats("16S", "ASV", "all.sp_all.asv", "Class")
get.stats("ITS", "ASV", "all.sp_all.asv", "Class")
Biomarkers (LEfSe analysis)
rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory
# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis/7_editing_figures"
newfolder <- "LEfSe"
if (!file.exists(newfolder)) dir.create(file.path(path, newfolder))
lefse.analysis <- function(libraries, subset, type, level) {
# libraries = "16S" (for bacterial library) or "ITS" (for fungal library)
# subset = "Pg-spec" (corresponding to "Pg" + "Pg-Bs" subsets) or "Ps-spec" (corresponding to "Ps" + "Ps-Bs" subsets)
# type = "ASV"
# level = "Phylum", "Class", "Family", "Order", Genus" or "Species"
library(microeco)
library(magrittr)
library(Biostrings)
library(ggplot2)
tryCatch(
{
# Loading tables
path <- paste0("7_editing_figures/", type, "_data/abund_tab_", subset, "_subset_", libraries, ".RDS")
if (file.exists(path)) {
abund_tab <- readRDS(path)
} else {
print("No available data. Skip the LEfSe analysis.")
return()
}
tab <- readRDS(paste0("7_editing_figures/", type, "_data/taxo_tab_", subset, "_subset_", libraries, ".RDS"))
if (length(tab) == 1) {
print("Not enough data. Skip th LEfSe analysis.")
return()
}
sample_data <- read.csv("sample_table.csv",
sep = ";",
header = T, row.names = "Sample_ID"
)
DS <- microtable$new(
otu_table = abund_tab,
tax_table = tab,
sample_table = sample_data
)
print(DS)
DS$tidy_dataset()
print(DS)
# LEfSe analysis
t1 <- trans_diff$new(
dataset = DS,
method = "lefse",
group = "Compartment",
alpha = 0.05,
lefse_subgroup = NULL,
taxa_level = paste0(level)
)
# Saving LEfSe results
saveRDS(t1$res_diff, file = paste0("7_editing_figures/LEfSe/lefse_", subset, "_subset_", level, "_", libraries, ".RDS"))
write.table(t1$res_diff,
file = paste0("7_editing_figures/LEfSe/lefse_", subset, "_subset_", level, "_", libraries, ".txt"),
sep = "\t", col.names = NA
)
# Display LEfSe results
library(DT)
print(datatable(t1$res_diff,
width = NULL, height = NULL, rownames = FALSE,
caption = paste0(libraries, " ", subset, " ", type, " ", level)
))
},
error = function(e) {
if (inherits(e, "error") && grepl("No significant feature found!", e$message)) {
cat("No significant taxa. Skip the LEfSe analysis.\n")
return()
} else if (inherits(e, "error") && grepl("all observations are in the same group", e$message)) {
cat("All observation are in the same group. Skip the LEfSe analysis.\n")
return()
} else {
stop(e)
}
}
)
}
lefse.analysis("16S", "Pg-spec", "ASV", "Genus")
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 705 rows and 67 columns
## tax_table have 705 rows and 7 columns
## microtable-class object:
## sample_table have 67 rows and 7 columns
## otu_table have 705 rows and 67 columns
## tax_table have 705 rows and 7 columns
## No taxa_abund list found. Calculate it with cal_abund function ...
## The result is stored in object$taxa_abund ...
## 198 input features ...
## 198 features are remained after removing unknown features ...
## Start Kruskal-Wallis rank sum test for Compartment ...
## 65 taxa found significant ...
## After P value adjustment, 47 taxa found significant ...
## Minimum LDA score: 3.99588516519995 maximum LDA score: 5.72372200421366
## Taxa abundance table is stored in object$res_abund ...
## lefse analysis result is stored in object$res_diff ...
lefse.analysis("16S", "Ps-spec", "ASV", "Genus")
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 959 rows and 64 columns
## tax_table have 959 rows and 7 columns
## microtable-class object:
## sample_table have 64 rows and 7 columns
## otu_table have 959 rows and 64 columns
## tax_table have 959 rows and 7 columns
## No taxa_abund list found. Calculate it with cal_abund function ...
## The result is stored in object$taxa_abund ...
## 240 input features ...
## 240 features are remained after removing unknown features ...
## Start Kruskal-Wallis rank sum test for Compartment ...
## 92 taxa found significant ...
## After P value adjustment, 76 taxa found significant ...
## Minimum LDA score: 3.64565560661862 maximum LDA score: 5.65051237769098
## Taxa abundance table is stored in object$res_abund ...
## lefse analysis result is stored in object$res_diff ...
lefse.analysis("ITS", "Pg-spec", "ASV", "Genus")
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 880 rows and 83 columns
## tax_table have 880 rows and 7 columns
## microtable-class object:
## sample_table have 83 rows and 7 columns
## otu_table have 880 rows and 83 columns
## tax_table have 880 rows and 7 columns
## No taxa_abund list found. Calculate it with cal_abund function ...
## The result is stored in object$taxa_abund ...
## 307 input features ...
## 307 features are remained after removing unknown features ...
## Start Kruskal-Wallis rank sum test for Compartment ...
## 94 taxa found significant ...
## After P value adjustment, 81 taxa found significant ...
## Minimum LDA score: 3.09885276866572 maximum LDA score: 5.64114530747946
## Taxa abundance table is stored in object$res_abund ...
## lefse analysis result is stored in object$res_diff ...
lefse.analysis("ITS", "Ps-spec", "ASV", "Genus")
## microtable-class object:
## sample_table have 147 rows and 7 columns
## otu_table have 1514 rows and 78 columns
## tax_table have 1514 rows and 7 columns
## microtable-class object:
## sample_table have 78 rows and 7 columns
## otu_table have 1514 rows and 78 columns
## tax_table have 1514 rows and 7 columns
## No taxa_abund list found. Calculate it with cal_abund function ...
## The result is stored in object$taxa_abund ...
## 434 input features ...
## 434 features are remained after removing unknown features ...
## Start Kruskal-Wallis rank sum test for Compartment ...
## 163 taxa found significant ...
## After P value adjustment, 139 taxa found significant ...
## Minimum LDA score: 2.76808901753798 maximum LDA score: 5.64156415348689
## Taxa abundance table is stored in object$res_abund ...
## lefse analysis result is stored in object$res_diff ...
Co-occurence networks
This part of the script is an adaptation of the script by Cheng Gao used in the following article published in Nature in 2022: Gao, Cheng, et al. « Co-Occurrence Networks Reveal More Complexity than Community Composition in Resistance and Resilience of Microbial Communities ». Nature Communications, vol. 13, no 1, July 2022, p. 3867. DOI.org (Crossref), https://doi.org/10.1038/s41467-022-31343-y.
Creation of BF (Bacterial and Fungi) subtables
rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory
# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis/7_editing_figures"
newfolder <- "co-occurence_network"
if (!file.exists(newfolder)) dir.create(file.path(path, newfolder))
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis/7_editing_figures/co-occurence_network"
newfolder <- "BF_tables"
if (!file.exists(file.path(path, newfolder))) {
dir.create(file.path(path, newfolder))
}
BF.tab <- function(subset, compartment, type) {
# subset = "Pg-spec" (corresponding to "Pg" + "Pg-Bs" subsets) or "Ps-spec" (corresponding to "Ps" + "Ps-Bs" subsets)
# compartment = "seeds", "pulps", "leaves", "roots" or "soil"
# type = "ASV"
library(dplyr)
# Creating BF abundance tables
path_16S <- paste0("7_editing_figures/", type, "_data/abund_tab_", subset, "_subset_", compartment, "_16S.RDS")
if (file.exists(path_16S)) tab_16S <- readRDS(file = path_16S)
path_ITS <- paste0("7_editing_figures/", type, "_data/abund_tab_", subset, "_subset_", compartment, "_ITS.RDS")
if (file.exists(path_ITS)) tab_ITS <- readRDS(file = path_ITS)
if (!exists("tab_16S") || !exists("tab_ITS")) {
print("One of the dataframe does not exist. Skip the co-occurence network analysis.")
return()
}
if (!exists("tab_16S") && !exists("tab_ITS")) {
print("Dataframes does not exist. Skip the co-occurence network analysis.")
return()
}
# Keep only samples in common
tab_16S <- dplyr::select(tab_16S, intersect(names(tab_16S), names(tab_ITS)))
tab_ITS <- dplyr::select(tab_ITS, intersect(names(tab_16S), names(tab_ITS)))
BF_abund_tab <- bind_rows(tab_16S, tab_ITS)
if (ncol(BF_abund_tab) == 0) {
print("No samples in common. Skip the co-occurence network analysis.")
return()
}
# Creating BF tables
path_16S <- paste0("7_editing_figures/", type, "_data/taxo_tab_", subset, "_subset_", compartment, "_16S.RDS")
if (file.exists(path_16S)) tab_16S <- readRDS(file = path_16S)
path_ITS <- paste0("7_editing_figures/", type, "_data/taxo_tab_", subset, "_subset_", compartment, "_ITS.RDS")
if (file.exists(path_ITS)) tab_ITS <- readRDS(file = path_ITS)
if (!exists("tab_16S") || !exists("tab_ITS")) {
print("One of the dataframe does not exist. Skip the co-occurence network analysis.")
return()
}
BF_tab <- bind_rows(tab_16S, tab_ITS)
BF_tab$ASV_ID <- row.names(BF_tab)
# Making all files consistent
library(microeco)
DS <- microtable$new(
otu_table = BF_abund_tab,
tax_table = BF_tab
)
print(DS)
DS$tidy_dataset()
print(DS)
# Saving tables
BF_abund <- data.frame(t(DS$otu_table))
saveRDS(BF_abund, file = paste0("7_editing_figures/co-occurence_network/BF_tables/abund_tab_", subset, "_subset_", compartment, "_BF.RDS"))
saveRDS(DS$tax_table, file = paste0("7_editing_figures/co-occurence_network/BF_tables/taxo_tab_", subset, "_subset_", compartment, "_BF.RDS"))
}
BF.tab("Pg-spec", "leaves", "ASV")
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 63 rows and 8 columns
## tax_table have 100 rows and 8 columns
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 63 rows and 8 columns
## tax_table have 63 rows and 8 columns
BF.tab("Pg-spec", "pulps", "ASV")
## microtable-class object:
## sample_table have 10 rows and 2 columns
## otu_table have 73 rows and 10 columns
## tax_table have 73 rows and 8 columns
## microtable-class object:
## sample_table have 10 rows and 2 columns
## otu_table have 73 rows and 10 columns
## tax_table have 73 rows and 8 columns
BF.tab("Pg-spec", "seeds", "ASV")
## microtable-class object:
## sample_table have 1 rows and 2 columns
## otu_table have 5 rows and 1 columns
## tax_table have 28 rows and 8 columns
## microtable-class object:
## sample_table have 1 rows and 2 columns
## otu_table have 5 rows and 1 columns
## tax_table have 5 rows and 8 columns
BF.tab("Pg-spec", "roots", "ASV")
## microtable-class object:
## sample_table have 16 rows and 2 columns
## otu_table have 616 rows and 16 columns
## tax_table have 616 rows and 8 columns
## microtable-class object:
## sample_table have 16 rows and 2 columns
## otu_table have 616 rows and 16 columns
## tax_table have 616 rows and 8 columns
BF.tab("Pg-spec", "soil", "ASV")
## microtable-class object:
## sample_table have 32 rows and 2 columns
## otu_table have 1151 rows and 32 columns
## tax_table have 1151 rows and 8 columns
## microtable-class object:
## sample_table have 32 rows and 2 columns
## otu_table have 1151 rows and 32 columns
## tax_table have 1151 rows and 8 columns
BF.tab("Ps-spec", "leaves", "ASV")
## microtable-class object:
## sample_table have 1 rows and 2 columns
## otu_table have 48 rows and 1 columns
## tax_table have 476 rows and 8 columns
## microtable-class object:
## sample_table have 1 rows and 2 columns
## otu_table have 48 rows and 1 columns
## tax_table have 48 rows and 8 columns
BF.tab("Ps-spec", "pulps", "ASV")
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 96 rows and 8 columns
## tax_table have 96 rows and 8 columns
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 96 rows and 8 columns
## tax_table have 96 rows and 8 columns
BF.tab("Ps-spec", "seeds", "ASV")
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 24 rows and 8 columns
## tax_table have 24 rows and 8 columns
## microtable-class object:
## sample_table have 8 rows and 2 columns
## otu_table have 24 rows and 8 columns
## tax_table have 24 rows and 8 columns
BF.tab("Ps-spec", "roots", "ASV")
## microtable-class object:
## sample_table have 14 rows and 2 columns
## otu_table have 611 rows and 14 columns
## tax_table have 621 rows and 8 columns
## microtable-class object:
## sample_table have 14 rows and 2 columns
## otu_table have 611 rows and 14 columns
## tax_table have 611 rows and 8 columns
BF.tab("Ps-spec", "soil", "ASV")
## microtable-class object:
## sample_table have 32 rows and 2 columns
## otu_table have 1760 rows and 32 columns
## tax_table have 1760 rows and 8 columns
## microtable-class object:
## sample_table have 32 rows and 2 columns
## otu_table have 1760 rows and 32 columns
## tax_table have 1760 rows and 8 columns
Spearman correlation test
rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory
# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis/7_editing_figures/co-occurence_network"
newfolder <- "spearman_correlation"
if (!file.exists(file.path(path, newfolder))) {
dir.create(file.path(path, newfolder))
}
SpMan <- function(subset, compartment, type) {
# subset = "Pg-spec" (corresponding to "Pg" + "Pg-Bs" subsets) or "Ps-spec" (corresponding to "Ps" + "Ps-Bs" subsets)
# compartment = "seeds", "pulps", "leaves", "roots" or "soil"
# type = "ASV"
library(vegan)
library(psych)
# Loading BF abundance table
path <- paste0("7_editing_figures/co-occurence_network/BF_tables/abund_tab_", subset, "_subset_", compartment, "_BF.RDS")
if (file.exists(path)) {
df <- readRDS(path)
} else {
print("No data available. Skip the co-occurence network analysis.")
return()
}
if (nrow(df) <= 1) {
print("Not enough data to proceed the following steps.")
return()
} else {
# Correlation test
spman.df <- corr.test(df, use = "pairwise", method = "spearman", adjust = "fdr", alpha = .05, ci = FALSE)
# Saving results
saveRDS(spman.df, paste0("7_editing_figures/co-occurence_network/spearman_correlation/SpMan_", subset, "_subset_", compartment, "_BF.RDS"))
}
}
SpMan("Pg-spec", "soil", "ASV")
SpMan("Pg-spec", "roots", "ASV")
SpMan("Pg-spec", "leaves", "ASV")
SpMan("Pg-spec", "pulps", "ASV")
SpMan("Pg-spec", "seeds", "ASV")
## [1] "Not enough data to proceed the following steps."
## NULL
SpMan("Ps-spec", "soil", "ASV")
SpMan("Ps-spec", "roots", "ASV")
SpMan("Ps-spec", "leaves", "ASV")
## [1] "Not enough data to proceed the following steps."
## NULL
SpMan("Ps-spec", "pulps", "ASV")
SpMan("Ps-spec", "seeds", "ASV")
Significant positive correlations
rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory
# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis/7_editing_figures/co-occurence_network"
newfolder <- "SpMan_Rsig"
if (!file.exists(file.path(path, newfolder))) {
dir.create(file.path(path, newfolder))
}
r.cutoff <- 0.6 # R² threshold
p.cutoff <- 0.05 # p-value threshold
SpMan.Rsig <- function(subset, compartment, type) {
# subset = "Pg-spec" (corresponding to "Pg" + "Pg-Bs" subsets) or "Ps-spec" (corresponding to "Ps" + "Ps-Bs" subsets)
# compartment = "seeds", "pulps", "leaves", "roots" or "soil"
# type = "ASV"
# Loading corr.test results
path <- paste0("7_editing_figures/co-occurence_network/spearman_correlation/SpMan_", subset, "_subset_", compartment, "_BF.RDS")
if (file.exists(path)) {
df <- readRDS(path)
} else {
print("No data available. Skip the co-occurence network analysis.")
return()
}
Cor <- as.matrix(df$r)
# Keep the upper triangular part of the correlation matrix
Cor.df <- data.frame(
row = rownames(Cor)[row(Cor)[upper.tri(Cor)]],
col = colnames(Cor)[col(Cor)[upper.tri(Cor)]],
Cor = Cor[upper.tri(Cor)]
)
P0 <- as.matrix(df$p)
P.df <- data.frame(
row = rownames(P0)[row(P0)[upper.tri(P0)]],
col = colnames(P0)[col(P0)[upper.tri(P0)]],
p = P0[upper.tri(P0)]
)
df2 <- data.frame(Cor.df, P.df, Subset = subset, Compartment = compartment, Cross = "BF")
# Keep data according to p-value and R² thresholds
da.tmp <- df.sig <- df2[df2$Cor > r.cutoff & df2$p < p.cutoff, ]
V1 <- data.frame("v1" = da.tmp$row)
V2 <- data.frame("v2" = da.tmp$col)
# Loading table
ID.tmp <- readRDS(paste0("7_editing_figures/co-occurence_network/BF_tables/taxo_tab_", subset, "_subset_", compartment, "_BF.RDS"))
IDsub1 <- ID.tmp[ID.tmp$ASV_ID %in% V1$v1, ]
IDsub2 <- ID.tmp[ID.tmp$ASV_ID %in% V2$v2, ]
V1$id <- 1:nrow(V1)
V2$id <- 1:nrow(V2)
M1 <- merge(V1, IDsub1, by.x = "v1", by.y = "ASV_ID", all.x = T)
M1 <- M1[order(M1$id), ]
M2 <- merge(V2, IDsub2, by.x = "v2", by.y = "ASV_ID", all.x = T)
M2 <- M2[order(M2$id), ]
df.tmp <- data.frame(da.tmp, M1, M2)
# Saving the significant correlations
saveRDS(df.tmp, paste0("7_editing_figures/co-occurence_network/SpMan_Rsig/SpMan_Rsig_", subset, "_subset_", compartment, "_BF.RDS"))
}
SpMan.Rsig("Pg-spec", "soil", "ASV")
SpMan.Rsig("Pg-spec", "roots", "ASV")
SpMan.Rsig("Pg-spec", "leaves", "ASV")
SpMan.Rsig("Pg-spec", "pulps", "ASV")
SpMan.Rsig("Pg-spec", "seeds", "ASV")
## [1] "No data available. Skip the co-occurence network analysis."
## NULL
SpMan.Rsig("Ps-spec", "soil", "ASV")
SpMan.Rsig("Ps-spec", "roots", "ASV")
SpMan.Rsig("Ps-spec", "leaves", "ASV")
## [1] "No data available. Skip the co-occurence network analysis."
## NULL
SpMan.Rsig("Ps-spec", "pulps", "ASV")
SpMan.Rsig("Ps-spec", "seeds", "ASV")
Plotting co-occurence network Bacteria x Fungi (ASV)
rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory
# Out folder creation
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis/7_editing_figures/co-occurence_network"
newfolder <- "SpMan_network"
if (!file.exists(file.path(path, newfolder))) {
dir.create(file.path(path, newfolder))
}
path <- "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis/7_editing_figures/co-occurence_network/SpMan_network"
newfolder_1 <- "property"
newfolder_2 <- "module_details"
newfolder_3 <- "figures"
if (!file.exists(file.path(path, newfolder_1))) {
dir.create(file.path(path, newfolder_1))
}
if (!file.exists(file.path(path, newfolder_2))) {
dir.create(file.path(path, newfolder_2))
}
if (!file.exists(file.path(path, newfolder_3))) {
dir.create(file.path(path, newfolder_3))
}
bac.fung.cross.net <- function(subset, compartment, type, edges) {
# subset = "Pg-spec" (corresponding to "Pg" + "Pg-Bs" subsets) or "Ps-spec" (corresponding to "Ps" + "Ps-Bs" subsets)
# compartment = "seeds", "pulps", "leaves", "roots" or "soil"
# type = "ASV"
# edges = "B" (links between bacterial ASVs), "F" (links between fungal ASVs), "BF" (links between bacterial and fungal ASVs) or "all" (all links)
library(igraph)
library(graphics)
# Loading data
path <- paste0("7_editing_figures/co-occurence_network/BF_tables/taxo_tab_", subset, "_subset_", compartment, "_BF.RDS")
if (file.exists(path)) {
ID.tmp <- readRDS(path)
} else {
print("No data available. Skip the co-occurence network analysis.")
return()
}
path <- paste0("7_editing_figures/co-occurence_network/SpMan_Rsig/SpMan_Rsig_", subset, "_subset_", compartment, "_BF.RDS")
if (file.exists(path)) {
da <- readRDS(path)
} else {
print("Not enough data. Skip the co-occurence network analysis.")
return()
}
# Creating graph
g <- graph.data.frame(da, directed = FALSE)
# Color edition
g.color <- droplevels(ID.tmp[ID.tmp$ASV_ID %in% V(g)$name, ]) # Colour filtering from ID.tmp
g.color <- g.color[match(V(g)$name, g.color$ASV_ID), ]
g.color$Kingdom <- factor(g.color$Kingdom, labels = c("black", "#D13C32"))
levels(g.color$Kingdom) <- c("black", "#D13C32")
V(g)$color <- as.character(g.color$Kingdom) # Assigning colours to graph nodes
num.edges <- length(E(g))
num.vertices <- length(V(g))
connectance <- edge_density(g, loops = FALSE)
average.degree <- mean(igraph::degree(g))
average.path.length <- average.path.length(g)
diameter <- diameter(g, directed = FALSE, unconnected = TRUE, weights = NULL)
edge.connectivity <- edge_connectivity(g)
clustering.coefficient <- transitivity(g)
no.clusters <- no.clusters(g)
centralization.betweenness <- centralization.betweenness(g)$centralization
centralization.degree <- centralization.degree(g)$centralization
fun.fc <- cluster_fast_greedy(g)
Modularity <- modularity(fun.fc, membership(fun.fc))
No.modules <- nrow(data.frame(sizes(fun.fc)))
# Summary table of graph properties
df.tmp <- data.frame(
network = "BF-FF-BB", cross = "Bacteria - Fungi", subset, compartment,
num.edges, num.vertices, connectance, average.degree,
average.path.length, diameter, edge.connectivity,
clustering.coefficient, no.clusters,
centralization.betweenness, centralization.degree,
Modularity, No.modules
)
# Saving the summary table
saveRDS(df.tmp, paste0("7_editing_figures/co-occurence_network/SpMan_network/property/SpMan_network_property_BF-BB-FF_", subset, "_subset_", compartment, "_BF.RDS"))
g.E <- data.frame(get.edgelist(g))
names(g.E) <- c("V1", "V2")
V1 <- data.frame("v1" = g.E$V1)
V2 <- data.frame("v2" = g.E$V2)
ID.tmpx <- ID.tmp[, c("ASV_ID", "Kingdom")]
IDsub1 <- ID.tmpx[ID.tmpx$ASV_ID %in% V1$v1, ]
IDsub2 <- ID.tmpx[ID.tmpx$ASV_ID %in% V2$v2, ]
V1$id <- 1:nrow(V1)
V2$id <- 1:nrow(V2)
M1 <- merge(V1, IDsub1, by.x = "v1", by.y = "ASV_ID", all.x = T)
M1 <- M1[order(M1$id), ]
M2 <- merge(V2, IDsub2, by.x = "v2", by.y = "ASV_ID", all.x = T)
M2 <- M2[order(M2$id), ]
if (edges == "B") {
M1$BF[M1$Kingdom == "k__Bacteria" & M2$Kingdom == "k__Bacteria"] <- "salmon1"
} else if (edges == "F") {
M1$BF[M1$Kingdom == "k__Fungi" & M2$Kingdom == "k__Fungi"] <- "paleturquoise3"
} else if (edges == "BF") {
M1$BF[M1$Kingdom == "k__Bacteria" & M2$Kingdom == "k__Fungi"] <- "grey"
} else if (edges == "all") {
M1$BF <- "#8FB08A"
M1$BF[M1$Kingdom == "k__Bacteria" & M2$Kingdom == "k__Bacteria"] <- "black"
M1$BF[M1$Kingdom == "k__Fungi" & M2$Kingdom == "k__Fungi"] <- "#FF7F7F"
}
E(g)$color <- as.character(M1$BF)
# Plotting graph
set.seed(123)
svg(paste0("7_editing_figures/co-occurence_network/SpMan_network/figures/", subset, "_subset_", compartment, "_", edges, ".svg"), width = 5, height = 5)
plot(g, edge.width = 1.5, vertex.frame.color = NA, vertex.label = NA, edge.lty = 1.5, edge.curved = T, vertex.size = 5, vertex.shape = "circle")
dev.off()
plot(g, edge.width = 1.5, vertex.frame.color = NA, vertex.label = NA, edge.lty = 1.5, edge.curved = T, vertex.size = 3.5, vertex.shape = "circle")
# Gathering information on modules
mod_list <- cluster_fast_greedy(g) # List of modules on the graph
table(membership(mod_list)) # Number of nodes per graph module
# Extract information on module membership
module_membership <- as.integer(membership(mod_list))
# Create a dataframe with module membership information and node names
module_df <- data.frame(V1 = module_membership, V2 = V(g)$name)
module_df <- merge(module_df, ID.tmp, by.x = "V2", by.y = "ASV_ID")
names(module_df) <- c(
"ASV_ID",
"Module",
"Kingdom",
"Phylum",
"Class",
"Order",
"Family",
"Genus",
"Species"
)
library(dplyr)
module_df <- module_df %>% dplyr::select(
"Module",
"ASV_ID",
"Kingdom",
"Phylum",
"Class",
"Order",
"Family",
"Genus",
"Species"
)
# Saving table
write.table(module_df,
file = paste0("7_editing_figures/co-occurence_network/SpMan_network/module_details/", subset, "_subset_", compartment, "_BF.txt"),
sep = "\t",
row.names = FALSE
)
}
# Execute the function with different parameters
bac.fung.cross.net("Pg-spec", "seeds", "ASV", "all")
## [1] "Not enough data. Skip the co-occurence network analysis."
## NULL
bac.fung.cross.net("Ps-spec", "seeds", "ASV", "all")
bac.fung.cross.net("Pg-spec", "pulps", "ASV", "all")
bac.fung.cross.net("Ps-spec", "pulps", "ASV", "all")
bac.fung.cross.net("Pg-spec", "leaves", "ASV", "all")
bac.fung.cross.net("Ps-spec", "leaves", "ASV", "all")
## [1] "Not enough data. Skip the co-occurence network analysis."
## NULL
bac.fung.cross.net("Pg-spec", "roots", "ASV", "all")
bac.fung.cross.net("Ps-spec", "roots", "ASV", "all")
bac.fung.cross.net("Pg-spec", "soil", "ASV", "all")
bac.fung.cross.net("Ps-spec", "soil", "ASV", "all")
Graphe and module informations
rm(list = ls(all.names = TRUE)) # Clean the environment
setwd(dir = "E:/Thèse - BioMiMe/Articles/Metabarcoding Pg x Ps/NGS_analysis_Pg.Ps/work/biostat_analysis") # Set directory
# Path to the directory containing the .RDS files
path <- "7_editing_figures/co-occurence_network/SpMan_network/property"
# Obtain the list of .RDS files in the directory
files <- list.files(path = path, pattern = "\\.RDS$", full.names = TRUE)
# Initialise a list to store loaded data
list_df <- list()
# Load each .RDS file in the list
for (file in files) {
x <- tools::file_path_sans_ext(basename(file)) # Name of the variable to be used
list_df[[x]] <- readRDS(file)
}
# Assemble all dataframes into a single dataframe
df <- do.call(rbind, list_df)
colnames(df) <- colnames(list_df[[1]]) # Assign column names
# Save dataframe
write.table(df,
file = "7_editing_figures/co-occurence_network/SpMan_network/property/network_statistics.txt",
sep = "\t",
row.names = FALSE
)
# Display table
library(DT)
print(datatable(df, width = NULL, height = NULL, caption = "Network statistics"))
show.tab <- function(type, subset, compartment) {
# subset = "Pg" or "Ps"
# compartment = "seeds", "pulps", "leaves", "roots" or "soil"
# type = "ASV"
# Loading table
path <- paste0("7_editing_figures/co-occurence_network/SpMan_network/module_details/", subset, "_subset_", compartment, "_BF.txt")
if (!file.exists(path)) {
print("No avalaible data.")
return()
} else {
tab <- read.table(
file = path,
sep = "\t",
header = TRUE
)
# Diplay table
library(DT)
print(datatable(tab,
width = NULL, height = NULL, rownames = FALSE,
caption = paste0(subset, "_", compartment, " (", type, ")")
))
}
}
show.tab("ASV", "Pg-spec", "seeds")
## [1] "No avalaible data."
## NULL
show.tab("ASV", "Ps-spec", "seeds")
show.tab("ASV", "Pg-spec", "pulps")
show.tab("ASV", "Ps-spec", "pulps")
show.tab("ASV", "Pg-spec", "leaves")
show.tab("ASV", "Ps-spec", "leaves")
## [1] "No avalaible data."
## NULL
show.tab("ASV", "Pg-spec", "roots")
show.tab("ASV", "Ps-spec", "roots")
show.tab("ASV", "Pg-spec", "soil")
show.tab("ASV", "Ps-spec", "soil")